diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 000000000..e16c2e461
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,6 @@
+[run]
+omit =
+    */tests/*
+    */llama_stack/providers/*
+    */llama_stack/templates/*
+    .venv/*
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 54c01c80d..5884f2582 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 
 # These owners will be the default owners for everything in
 # the repo. Unless a later match takes precedence,
-* @ashwinb @yanxi0830 @hardikjshah @dltn @raghotham @dineshyv @vladimirivic @sixianyi0721 @ehhuang @terrytangyuan @SLR722 @leseb
+* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index af2058b9a..263828e1c 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,10 +1,8 @@
 # What does this PR do?
-[Provide a short summary of what this PR does and why. Link to relevant issues if applicable.]
+<!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. -->
 
-[//]: # (If resolving an issue, uncomment and update the line below)
-[//]: # (Closes #[issue-number])
+<!-- If resolving an issue, uncomment and update the line below -->
+<!-- Closes #[issue-number] -->
 
 ## Test Plan
-[Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*]
-
-[//]: # (## Documentation)
+<!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* -->
diff --git a/.github/TRIAGERS.md b/.github/TRIAGERS.md
index d4ef6d1ac..586a5a506 100644
--- a/.github/TRIAGERS.md
+++ b/.github/TRIAGERS.md
@@ -1,2 +1,2 @@
 # This file documents Triage members in the Llama Stack community
-@franciscojavierarceo @leseb
+ @bbrowning @booxter @franciscojavierarceo @leseb
diff --git a/.github/actions/setup-ollama/action.yml b/.github/actions/setup-ollama/action.yml
new file mode 100644
index 000000000..3dd6c940c
--- /dev/null
+++ b/.github/actions/setup-ollama/action.yml
@@ -0,0 +1,26 @@
+name: Setup Ollama
+description: Start Ollama and cache model
+inputs:
+  models:
+    description: Comma-separated list of models to pull
+    default: "llama3.2:3b-instruct-fp16,all-minilm:latest"
+runs:
+  using: "composite"
+  steps:
+    - name: Install and start Ollama
+      shell: bash
+      run: |
+        # the ollama installer also starts the ollama service
+        curl -fsSL https://ollama.com/install.sh | sh
+
+    # Do NOT cache models - pulling the cache is actually slower than just pulling the model.
+    # It takes ~45 seconds to pull the models from the cache and unpack it, but only 30 seconds to
+    # pull them directly.
+    # Maybe this is because the cache is being pulled at the same time by all the matrix jobs?
+    - name: Pull requested models
+      if: inputs.models != ''
+      shell: bash
+      run: |
+        for model in $(echo "${{ inputs.models }}" | tr ',' ' '); do
+          ollama pull "$model"
+        done
diff --git a/.github/actions/setup-runner/action.yml b/.github/actions/setup-runner/action.yml
new file mode 100644
index 000000000..6cba4fdc3
--- /dev/null
+++ b/.github/actions/setup-runner/action.yml
@@ -0,0 +1,22 @@
+name: Setup runner
+description: Prepare a runner for the tests (install uv, python, project dependencies, etc.)
+runs:
+  using: "composite"
+  steps:
+    - name: Install uv
+      uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
+      with:
+        python-version: "3.10"
+        activate-environment: true
+        version: 0.7.6
+
+    - name: Install dependencies
+      shell: bash
+      run: |
+        uv sync --all-groups
+        uv pip install ollama faiss-cpu
+        # always test against the latest version of the client
+        # TODO: this is not necessarily a good idea. we need to test against both published and latest
+        # to find out backwards compatibility issues.
+        uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
+        uv pip install -e .
diff --git a/.github/workflows/Dockerfile b/.github/workflows/Dockerfile
new file mode 100644
index 000000000..9261bd174
--- /dev/null
+++ b/.github/workflows/Dockerfile
@@ -0,0 +1 @@
+FROM localhost:5000/distribution-kvant:dev
\ No newline at end of file
diff --git a/.github/workflows/ci-playground.yaml b/.github/workflows/ci-playground.yaml
new file mode 100644
index 000000000..251782855
--- /dev/null
+++ b/.github/workflows/ci-playground.yaml
@@ -0,0 +1,73 @@
+name: Build and Push playground container
+run-name: Build and Push playground container
+on:
+  workflow_dispatch:
+  #schedule:
+  #  - cron: "0 10 * * *"
+  push:
+    branches:
+      - main
+      - kvant
+    tags:
+      - 'v*'
+  pull_request:
+    branches:
+      - main
+      - kvant
+env:
+  IMAGE: git.kvant.cloud/${{github.repository}}-playground
+jobs:
+  build-playground:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set current time
+        uses: https://github.com/gerred/actions/current-time@master
+        id: current_time
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to git.kvant.cloud registry
+        uses: docker/login-action@v3
+        with:
+          registry: git.kvant.cloud
+          username: ${{ vars.ORG_PACKAGE_WRITER_USERNAME }}
+          password: ${{ secrets.ORG_PACKAGE_WRITER_TOKEN }}
+      
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          # list of Docker images to use as base name for tags
+          images: |
+            ${{env.IMAGE}}
+          # generate Docker tags based on the following events/attributes
+          tags: |
+            type=schedule
+            type=ref,event=branch
+            type=ref,event=pr
+            type=ref,event=tag
+            type=semver,pattern={{version}}
+
+      - name: Build and push to gitea registry
+        uses: docker/build-push-action@v6
+        with:
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          context: .
+          file: llama_stack/distribution/ui/Containerfile
+          provenance: mode=max
+          sbom: true
+          build-args: |
+            BUILD_DATE=${{ steps.current_time.outputs.time }}
+          cache-from: |
+            type=registry,ref=${{ env.IMAGE }}:buildcache
+            type=registry,ref=${{ env.IMAGE }}:${{ github.ref_name }}
+            type=registry,ref=${{ env.IMAGE }}:main
+          cache-to: type=registry,ref=${{ env.IMAGE }}:buildcache,mode=max,image-manifest=true
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 000000000..87f196cc2
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,98 @@
+name: Build and Push container
+run-name: Build and Push container
+on:
+  workflow_dispatch:
+  #schedule:
+  #  - cron: "0 10 * * *"
+  push:
+    branches:
+      - main
+      - kvant
+    tags:
+      - 'v*'
+  pull_request:
+    branches:
+      - main
+      - kvant
+env:
+  IMAGE: git.kvant.cloud/${{github.repository}}
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    services:
+      registry:
+        image: registry:2
+        ports:
+          - 5000:5000
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set current time
+        uses: https://github.com/gerred/actions/current-time@master
+        id: current_time
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: network=host
+
+      - name: Login to git.kvant.cloud registry
+        uses: docker/login-action@v3
+        with:
+          registry: git.kvant.cloud
+          username: ${{ vars.ORG_PACKAGE_WRITER_USERNAME }}
+          password: ${{ secrets.ORG_PACKAGE_WRITER_TOKEN }}
+      
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          # list of Docker images to use as base name for tags
+          images: |
+            ${{env.IMAGE}}
+          # generate Docker tags based on the following events/attributes
+          tags: |
+            type=schedule
+            type=ref,event=branch
+            type=ref,event=pr
+            type=ref,event=tag
+            type=semver,pattern={{version}}
+
+      - name: Install uv
+        uses: https://github.com/astral-sh/setup-uv@v5
+        with:
+          # Install a specific version of uv.
+          version: "0.7.8"
+            
+      - name: Build
+        env:
+          USE_COPY_NOT_MOUNT: true
+          LLAMA_STACK_DIR: .
+        run: |
+          uvx --from . llama stack build --template kvant --image-type container
+
+          # docker tag distribution-kvant:dev ${{env.IMAGE}}:kvant
+          # docker push ${{env.IMAGE}}:kvant
+
+          docker tag distribution-kvant:dev localhost:5000/distribution-kvant:dev
+          docker push localhost:5000/distribution-kvant:dev
+
+      - name: Build and push to gitea registry
+        uses: docker/build-push-action@v6
+        with:
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          context: .github/workflows
+          provenance: mode=max
+          sbom: true
+          build-args: |
+            BUILD_DATE=${{ steps.current_time.outputs.time }}
+          cache-from: |
+            type=registry,ref=${{ env.IMAGE }}:buildcache
+            type=registry,ref=${{ env.IMAGE }}:${{ github.ref_name }}
+            type=registry,ref=${{ env.IMAGE }}:main
+          cache-to: type=registry,ref=${{ env.IMAGE }}:buildcache,mode=max,image-manifest=true
diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml
deleted file mode 100644
index 915344221..000000000
--- a/.github/workflows/providers-build.yml
+++ /dev/null
@@ -1,83 +0,0 @@
-name: Test Llama Stack Build
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - 'llama_stack/cli/stack/build.py'
-      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/distribution/build.*'
-      - 'llama_stack/distribution/*.sh'
-      - '.github/workflows/providers-build.yml'
-  pull_request:
-    paths:
-      - 'llama_stack/cli/stack/build.py'
-      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/distribution/build.*'
-      - 'llama_stack/distribution/*.sh'
-      - '.github/workflows/providers-build.yml'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  generate-matrix:
-    runs-on: ubuntu-latest
-    outputs:
-      templates: ${{ steps.set-matrix.outputs.templates }}
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Generate Template List
-        id: set-matrix
-        run: |
-          templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
-          echo "templates=$templates" >> "$GITHUB_OUTPUT"
-
-  build:
-    needs: generate-matrix
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        template: ${{ fromJson(needs.generate-matrix.outputs.templates) }}
-        image-type: [venv, container]
-      fail-fast: false # We want to run all jobs even if some fail
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Set up Python
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
-        with:
-          python-version: '3.10'
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 # v5.4.0
-        with:
-          python-version: "3.10"
-
-      - name: Install LlamaStack
-        run: |
-          uv venv
-          source .venv/bin/activate
-          uv pip install -e .
-
-      - name: Print build dependencies
-        run: |
-          uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
-
-      - name: Run Llama Stack Build
-        run: |
-          # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
-          # LLAMA_STACK_DIR is set to the current directory so we are building from the source
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
-
-      - name: Print dependencies in the image
-        if: matrix.image-type == 'venv'
-        run: |
-          source test/bin/activate
-          uv pip list
diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml
deleted file mode 100644
index 2ead8f845..000000000
--- a/.github/workflows/test-external-providers.yml
+++ /dev/null
@@ -1,93 +0,0 @@
-name: Test External Providers
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-
-jobs:
-  test-external-providers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          python-version: "3.10"
-
-      - name: Install Ollama
-        run: |
-          curl -fsSL https://ollama.com/install.sh | sh
-
-      - name: Pull Ollama image
-        run: |
-          ollama pull llama3.2:3b-instruct-fp16
-
-      - name: Start Ollama in background
-        run: |
-          nohup ollama run llama3.2:3b-instruct-fp16 --keepalive=30m > ollama.log 2>&1 &
-
-      - name: Set Up Environment and Install Dependencies
-        run: |
-          uv sync --extra dev --extra test
-          uv pip install -e .
-
-      - name: Install Ollama custom provider
-        run: |
-          mkdir -p tests/external-provider/llama-stack-provider-ollama/src/
-          cp -a llama_stack/providers/remote/inference/ollama/ tests/external-provider/llama-stack-provider-ollama/src/llama_stack_provider_ollama
-          uv pip install tests/external-provider/llama-stack-provider-ollama
-
-      - name: Create provider configuration
-        run: |
-          mkdir -p /tmp/providers.d/remote/inference
-          cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /tmp/providers.d/remote/inference/custom_ollama.yaml
-
-      - name: Wait for Ollama to start
-        run: |
-          echo "Waiting for Ollama..."
-          for i in {1..30}; do
-            if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
-              echo "Ollama is running!"
-              exit 0
-            fi
-            sleep 1
-          done
-          echo "Ollama failed to start"
-          ollama ps
-          ollama.log
-          exit 1
-
-      - name: Start Llama Stack server in background
-        env:
-          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
-        run: |
-          source .venv/bin/activate
-          nohup uv run llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type venv > server.log 2>&1 &
-
-      - name: Wait for Llama Stack server to be ready
-        run: |
-          echo "Waiting for Llama Stack server..."
-          for i in {1..30}; do
-            if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
-              echo "Llama Stack server is up!"
-              if grep -q "remote::custom_ollama from /tmp/providers.d/remote/inference/custom_ollama.yaml" server.log; then
-                echo "Llama Stack server is using custom Ollama provider"
-                exit 0
-              else
-                echo "Llama Stack server is not using custom Ollama provider"
-                exit 1
-              fi
-            fi
-            sleep 1
-          done
-          echo "Llama Stack server failed to start"
-          cat server.log
-          exit 1
-
-      - name: run inference tests
-        run: |
-          uv run pytest -v tests/integration/inference/test_text_inference.py --stack-config="http://localhost:8321" --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2
diff --git a/.github/workflows/changelog.yml b/.github/workflows_upstream/changelog.yml
similarity index 100%
rename from .github/workflows/changelog.yml
rename to .github/workflows_upstream/changelog.yml
diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows_upstream/gha_workflow_llama_stack_tests.yml
similarity index 100%
rename from .github/workflows/gha_workflow_llama_stack_tests.yml
rename to .github/workflows_upstream/gha_workflow_llama_stack_tests.yml
diff --git a/.github/workflows_upstream/install-script-ci.yml b/.github/workflows_upstream/install-script-ci.yml
new file mode 100644
index 000000000..2eb234c77
--- /dev/null
+++ b/.github/workflows_upstream/install-script-ci.yml
@@ -0,0 +1,26 @@
+name: Installer CI
+
+on:
+  pull_request:
+    paths:
+      - 'install.sh'
+  push:
+    paths:
+      - 'install.sh'
+  schedule:
+    - cron: '0 2 * * *'  # every day at 02:00 UTC
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
+      - name: Run ShellCheck on install.sh
+        run: shellcheck install.sh
+  smoke-test:
+    needs: lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
+      - name: Run installer end-to-end
+        run: ./install.sh
diff --git a/.github/workflows_upstream/integration-auth-tests.yml b/.github/workflows_upstream/integration-auth-tests.yml
new file mode 100644
index 000000000..a3a746246
--- /dev/null
+++ b/.github/workflows_upstream/integration-auth-tests.yml
@@ -0,0 +1,132 @@
+name: Integration Auth Tests
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'distributions/**'
+      - 'llama_stack/**'
+      - 'tests/integration/**'
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - '.github/workflows/integration-auth-tests.yml' # This workflow
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-matrix:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        auth-provider: [oauth2_token]
+      fail-fast: false # we want to run all tests regardless of failure
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
+
+      - name: Build Llama Stack
+        run: |
+          llama stack build --template ollama --image-type venv
+
+      - name: Install minikube
+        if: ${{ matrix.auth-provider == 'kubernetes' }}
+        uses: medyagh/setup-minikube@cea33675329b799adccc9526aa5daccc26cd5052 # v0.0.19
+
+      - name: Start minikube
+        if: ${{ matrix.auth-provider == 'oauth2_token' }}
+        run: |
+          minikube start
+          kubectl get pods -A
+
+      - name: Configure Kube Auth
+        if: ${{ matrix.auth-provider == 'oauth2_token' }}
+        run: |
+          kubectl create namespace llama-stack
+          kubectl create serviceaccount llama-stack-auth -n llama-stack
+          kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
+          kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
+          cat <<EOF | kubectl apply -f -
+          apiVersion: rbac.authorization.k8s.io/v1
+          kind: ClusterRole
+          metadata:
+            name: allow-anonymous-openid
+          rules:
+          - nonResourceURLs: ["/openid/v1/jwks"]
+            verbs: ["get"]
+          ---
+          apiVersion: rbac.authorization.k8s.io/v1
+          kind: ClusterRoleBinding
+          metadata:
+            name: allow-anonymous-openid
+          roleRef:
+            apiGroup: rbac.authorization.k8s.io
+            kind: ClusterRole
+            name: allow-anonymous-openid
+          subjects:
+          - kind: User
+            name: system:anonymous
+            apiGroup: rbac.authorization.k8s.io
+          EOF
+
+      - name: Set Kubernetes Config
+        if: ${{ matrix.auth-provider == 'oauth2_token' }}
+        run: |
+          echo "KUBERNETES_API_SERVER_URL=$(kubectl get --raw /.well-known/openid-configuration| jq -r .jwks_uri)" >> $GITHUB_ENV
+          echo "KUBERNETES_CA_CERT_PATH=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}')" >> $GITHUB_ENV
+          echo "KUBERNETES_ISSUER=$(kubectl get --raw /.well-known/openid-configuration| jq -r .issuer)" >> $GITHUB_ENV
+          echo "KUBERNETES_AUDIENCE=$(kubectl create token llama-stack-auth -n llama-stack --duration=1h | cut -d. -f2 | base64 -d | jq -r '.aud[0]')" >> $GITHUB_ENV
+
+      - name: Set Kube Auth Config and run server
+        env:
+          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
+        if: ${{ matrix.auth-provider == 'oauth2_token' }}
+        run: |
+          run_dir=$(mktemp -d)
+          cat <<'EOF' > $run_dir/run.yaml
+          version: '2'
+          image_name: kube
+          apis: []
+          providers: {}
+          server:
+            port: 8321
+          EOF
+          yq eval '.server.auth = {"provider_type": "${{ matrix.auth-provider }}"}' -i $run_dir/run.yaml
+          yq eval '.server.auth.config = {"tls_cafile": "${{ env.KUBERNETES_CA_CERT_PATH }}", "issuer": "${{ env.KUBERNETES_ISSUER }}", "audience": "${{ env.KUBERNETES_AUDIENCE }}"}' -i $run_dir/run.yaml
+          yq eval '.server.auth.config.jwks = {"uri": "${{ env.KUBERNETES_API_SERVER_URL }}"}' -i $run_dir/run.yaml
+          cat $run_dir/run.yaml
+
+          nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
+
+      - name: Wait for Llama Stack server to be ready
+        run: |
+          echo "Waiting for Llama Stack server..."
+          for i in {1..30}; do
+            if curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://localhost:8321/v1/health | grep -q "OK"; then
+              echo "Llama Stack server is up!"
+              if grep -q "Enabling authentication with provider: ${{ matrix.auth-provider }}" server.log; then
+                echo "Llama Stack server is configured to use ${{ matrix.auth-provider }} auth"
+                exit 0
+              else
+                echo "Llama Stack server is not configured to use ${{ matrix.auth-provider }} auth"
+                cat server.log
+                exit 1
+              fi
+            fi
+            sleep 1
+          done
+          echo "Llama Stack server failed to start"
+          cat server.log
+          exit 1
+
+      - name: Test auth
+        run: |
+          curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers|jq
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows_upstream/integration-tests.yml
similarity index 57%
rename from .github/workflows/integration-tests.yml
rename to .github/workflows_upstream/integration-tests.yml
index 665f8bd7e..d78e82c9d 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows_upstream/integration-tests.yml
@@ -6,7 +6,6 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'distributions/**'
       - 'llama_stack/**'
       - 'tests/integration/**'
       - 'uv.lock'
@@ -25,7 +24,7 @@ jobs:
       matrix:
         # Listing tests manually since some of them currently fail
         # TODO: generate matrix list from tests/integration when fixed
-        test-type: [agents, inference, datasets, inspect, scoring, post_training, providers]
+        test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime]
         client-type: [library, http]
       fail-fast: false # we want to run all tests regardless of failure
 
@@ -33,56 +32,22 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
-      - name: Install uv
-        uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 # v5.4.0
-        with:
-          python-version: "3.10"
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
 
-      - name: Install Ollama
-        run: |
-          curl -fsSL https://ollama.com/install.sh | sh
+      - name: Setup ollama
+        uses: ./.github/actions/setup-ollama
 
-      - name: Pull Ollama image
+      - name: Build Llama Stack
         run: |
-          ollama pull llama3.2:3b-instruct-fp16
-
-      - name: Start Ollama in background
-        run: |
-          nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 &
-
-      - name: Set Up Environment and Install Dependencies
-        run: |
-          uv sync --extra dev --extra test
-          uv pip install ollama faiss-cpu
-          # always test against the latest version of the client
-          # TODO: this is not necessarily a good idea. we need to test against both published and latest
-          # to find out backwards compatibility issues.
-          uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
-          uv pip install -e .
           llama stack build --template ollama --image-type venv
 
-      - name: Wait for Ollama to start
-        run: |
-          echo "Waiting for Ollama..."
-          for i in {1..30}; do
-            if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
-              echo "Ollama is running!"
-              exit 0
-            fi
-            sleep 1
-          done
-          echo "Ollama failed to start"
-          ollama ps
-          ollama.log
-          exit 1
-
       - name: Start Llama Stack server in background
         if: matrix.client-type == 'http'
         env:
           INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
         run: |
-          source .venv/bin/activate
-          nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 &
+          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv &
 
       - name: Wait for Llama Stack server to be ready
         if: matrix.client-type == 'http'
@@ -99,6 +64,23 @@ jobs:
           cat server.log
           exit 1
 
+      - name: Verify Ollama status is OK
+        if: matrix.client-type == 'http'
+        run: |
+          echo "Verifying Ollama status..."
+          ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
+          echo "Ollama status: $ollama_status"
+          if [ "$ollama_status" != "OK" ]; then
+            echo "Ollama health check failed"
+            exit 1
+          fi
+
+      - name: Check Storage and Memory Available Before Tests
+        if: ${{ always() }}
+        run: |
+          free -h
+          df -h
+
       - name: Run Integration Tests
         env:
           INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
@@ -108,7 +90,27 @@ jobs:
           else
             stack_config="http://localhost:8321"
           fi
-          uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
+          uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
             -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
             --text-model="meta-llama/Llama-3.2-3B-Instruct" \
             --embedding-model=all-MiniLM-L6-v2
+
+      - name: Check Storage and Memory Available After Tests
+        if: ${{ always() }}
+        run: |
+          free -h
+          df -h
+
+      - name: Write ollama logs to file
+        if: ${{ always() }}
+        run: |
+          sudo journalctl -u ollama.service > ollama.log
+
+      - name: Upload all logs to artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}
+          path: |
+            *.log
+          retention-days: 1
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows_upstream/pre-commit.yml
similarity index 60%
rename from .github/workflows/pre-commit.yml
rename to .github/workflows_upstream/pre-commit.yml
index 847aaecd7..2bbd52c53 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows_upstream/pre-commit.yml
@@ -18,7 +18,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: Set up Python
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
         with:
           python-version: '3.11'
           cache: pip
@@ -27,7 +27,19 @@ jobs:
             .pre-commit-config.yaml
 
       - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
+        env:
+          SKIP: no-commit-to-branch
+          RUFF_OUTPUT_FORMAT: github
 
       - name: Verify if there are any diff files after pre-commit
         run: |
           git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
+
+      - name: Verify if there are any new files after pre-commit
+        run: |
+          unstaged_files=$(git ls-files --others --exclude-standard)
+          if [ -n "$unstaged_files" ]; then
+            echo "There are uncommitted new files, run pre-commit locally and commit again"
+            echo "$unstaged_files"
+            exit 1
+          fi
diff --git a/.github/workflows_upstream/providers-build.yml b/.github/workflows_upstream/providers-build.yml
new file mode 100644
index 000000000..cf53459b9
--- /dev/null
+++ b/.github/workflows_upstream/providers-build.yml
@@ -0,0 +1,147 @@
+name: Test Llama Stack Build
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'llama_stack/cli/stack/build.py'
+      - 'llama_stack/cli/stack/_build.py'
+      - 'llama_stack/distribution/build.*'
+      - 'llama_stack/distribution/*.sh'
+      - '.github/workflows/providers-build.yml'
+  pull_request:
+    paths:
+      - 'llama_stack/cli/stack/build.py'
+      - 'llama_stack/cli/stack/_build.py'
+      - 'llama_stack/distribution/build.*'
+      - 'llama_stack/distribution/*.sh'
+      - '.github/workflows/providers-build.yml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  generate-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      templates: ${{ steps.set-matrix.outputs.templates }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Generate Template List
+        id: set-matrix
+        run: |
+          templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
+          echo "templates=$templates" >> "$GITHUB_OUTPUT"
+
+  build:
+    needs: generate-matrix
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        template: ${{ fromJson(needs.generate-matrix.outputs.templates) }}
+        image-type: [venv, container]
+      fail-fast: false # We want to run all jobs even if some fail
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
+
+      - name: Print build dependencies
+        run: |
+          uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
+
+      - name: Run Llama Stack Build
+        run: |
+          # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
+          # LLAMA_STACK_DIR is set to the current directory so we are building from the source
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
+
+      - name: Print dependencies in the image
+        if: matrix.image-type == 'venv'
+        run: |
+          uv pip list
+
+  build-single-provider:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
+
+      - name: Build a single provider
+        run: |
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
+
+  build-custom-container-distribution:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
+
+      - name: Build a single provider
+        run: |
+          yq -i '.image_type = "container"' llama_stack/templates/starter/build.yaml
+          yq -i '.image_name = "test"' llama_stack/templates/starter/build.yaml
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/templates/starter/build.yaml
+
+      - name: Inspect the container image entrypoint
+        run: |
+          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
+          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
+          echo "Entrypoint: $entrypoint"
+          if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
+            echo "Entrypoint is not correct"
+            exit 1
+          fi
+
+  build-ubi9-container-distribution:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
+
+      - name: Pin template to UBI9 base
+        run: |
+          yq -i '
+            .image_type    = "container" |
+            .image_name    = "ubi9-test" |
+            .distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
+          ' llama_stack/templates/starter/build.yaml
+
+      - name: Build dev container (UBI9)
+        env:
+          USE_COPY_NOT_MOUNT: "true"
+          LLAMA_STACK_DIR: "."
+        run: |
+          uv run llama stack build --config llama_stack/templates/starter/build.yaml
+
+      - name: Inspect UBI9 image
+        run: |
+          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
+          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
+          echo "Entrypoint: $entrypoint"
+          if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
+            echo "Entrypoint is not correct"
+            exit 1
+          fi
+
+          echo "Checking /etc/os-release in $IMAGE_ID"
+          docker run --rm --entrypoint sh "$IMAGE_ID" -c \
+              'source /etc/os-release && echo "$ID"' \
+              | grep -qE '^(rhel|ubi)$' \
+              || { echo "Base image is not UBI 9!"; exit 1; }
diff --git a/.github/workflows/semantic-pr.yml b/.github/workflows_upstream/semantic-pr.yml
similarity index 100%
rename from .github/workflows/semantic-pr.yml
rename to .github/workflows_upstream/semantic-pr.yml
diff --git a/.github/workflows/stale_bot.yml b/.github/workflows_upstream/stale_bot.yml
similarity index 100%
rename from .github/workflows/stale_bot.yml
rename to .github/workflows_upstream/stale_bot.yml
diff --git a/.github/workflows_upstream/test-external-providers.yml b/.github/workflows_upstream/test-external-providers.yml
new file mode 100644
index 000000000..06ab7cf3c
--- /dev/null
+++ b/.github/workflows_upstream/test-external-providers.yml
@@ -0,0 +1,71 @@
+name: Test External Providers
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'llama_stack/**'
+      - 'tests/integration/**'
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - '.github/workflows/test-external-providers.yml' # This workflow
+
+jobs:
+  test-external-providers:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        image-type: [venv]
+        # We don't do container yet, it's tricky to install a package from the host into the
+        # container and point 'uv pip install' to the correct path...
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
+
+      - name: Apply image type to config file
+        run: |
+          yq -i '.image_type = "${{ matrix.image-type }}"' tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+          cat tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+
+      - name: Setup directory for Ollama custom provider
+        run: |
+          mkdir -p tests/external-provider/llama-stack-provider-ollama/src/
+          cp -a llama_stack/providers/remote/inference/ollama/ tests/external-provider/llama-stack-provider-ollama/src/llama_stack_provider_ollama
+
+      - name: Create provider configuration
+        run: |
+          mkdir -p /home/runner/.llama/providers.d/remote/inference
+          cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml
+
+      - name: Build distro from config file
+        run: |
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+
+      - name: Start Llama Stack server in background
+        if: ${{ matrix.image-type }} == 'venv'
+        env:
+          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
+        run: |
+          uv run pip list
+          nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
+
+      - name: Wait for Llama Stack server to be ready
+        run: |
+          for i in {1..30}; do
+            if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then
+              echo "Waiting for Llama Stack server to load the provider..."
+              sleep 1
+            else
+              echo "Provider loaded"
+              exit 0
+            fi
+          done
+          echo "Provider failed to load"
+          cat server.log
+          exit 1
diff --git a/.github/workflows/tests.yml b/.github/workflows_upstream/tests.yml
similarity index 100%
rename from .github/workflows/tests.yml
rename to .github/workflows_upstream/tests.yml
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows_upstream/unit-tests.yml
similarity index 72%
rename from .github/workflows/unit-tests.yml
rename to .github/workflows_upstream/unit-tests.yml
index da7289afc..fc0459f0f 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows_upstream/unit-tests.yml
@@ -6,7 +6,6 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'distributions/**'
       - 'llama_stack/**'
       - 'tests/unit/**'
       - 'uv.lock'
@@ -31,17 +30,11 @@ jobs:
           - "3.12"
           - "3.13"
     steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
-      - name: Set up Python ${{ matrix.python }}
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
-        with:
-          python-version: ${{ matrix.python }}
-
-      - uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 # v5.4.0
-        with:
-          python-version: ${{ matrix.python }}
-          enable-cache: false
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
 
       - name: Run unit tests
         run: |
diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows_upstream/update-readthedocs.yml
similarity index 78%
rename from .github/workflows/update-readthedocs.yml
rename to .github/workflows_upstream/update-readthedocs.yml
index 74bf0d0b0..981332a77 100644
--- a/.github/workflows/update-readthedocs.yml
+++ b/.github/workflows_upstream/update-readthedocs.yml
@@ -14,6 +14,8 @@ on:
       - 'docs/**'
       - 'pyproject.toml'
       - '.github/workflows/update-readthedocs.yml'
+    tags:
+      - '*'
   pull_request:
     branches:
       - main
@@ -35,16 +37,8 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
-      - name: Set up Python
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
-        with:
-          python-version: '3.11'
-
-      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 # v5.4.0
-
-      - name: Sync with uv
-        run: uv sync --extra docs
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
 
       - name: Build HTML
         run: |
@@ -61,7 +55,10 @@ jobs:
 
           response=$(curl -X POST \
             -H "Content-Type: application/json" \
-            -d "{\"token\": \"$TOKEN\"}" \
+            -d "{
+              \"token\": \"$TOKEN\",
+              \"version\": \"$GITHUB_REF_NAME\"
+            }" \
             https://readthedocs.org/api/v2/webhook/llama-stack/289768/)
 
           echo "Response: $response"
diff --git a/.gitignore b/.gitignore
index 0ef25cdf1..747acdc7b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ dev_requirements.txt
 build
 .DS_Store
 llama_stack/configs/*
+.cursor/
 xcuserdata/
 *.hmap
 .DS_Store
@@ -23,3 +24,4 @@ venv/
 pytest-report.xml
 .coverage
 .python-version
+data
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ff3bc1250..aaec469e4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,6 +15,18 @@ repos:
         args: ['--maxkb=1000']
     -   id: end-of-file-fixer
         exclude: '^(.*\.svg)$'
+    -   id: no-commit-to-branch
+    -   id: check-yaml
+        args: ["--unsafe"]
+    -   id: detect-private-key
+    -   id: requirements-txt-fixer
+    -   id: mixed-line-ending
+        args: [--fix=lf] # Forces to replace line ending by LF (line feed)
+    -   id: check-executables-have-shebangs
+    -   id: check-json
+    -   id: check-shebang-scripts-are-executable
+    -   id: check-symlinks
+    -   id: check-toml
 
 -   repo: https://github.com/Lucas-C/pre-commit-hooks
     rev: v1.5.4
@@ -41,7 +53,7 @@ repos:
         - black==24.3.0
 
 -   repo: https://github.com/astral-sh/uv-pre-commit
-    rev: 0.6.3
+    rev: 0.7.8
     hooks:
     -   id: uv-lock
     -   id: uv-export
@@ -49,6 +61,7 @@ repos:
             "--frozen",
             "--no-hashes",
             "--no-emit-project",
+            "--no-default-groups",
             "--output-file=requirements.txt"
         ]
 
@@ -76,24 +89,29 @@ repos:
       - id: distro-codegen
         name: Distribution Template Codegen
         additional_dependencies:
-          - uv==0.6.0
-        entry: uv run --extra codegen ./scripts/distro_codegen.py
+          - uv==0.7.8
+        entry: uv run --group codegen ./scripts/distro_codegen.py
         language: python
         pass_filenames: false
         require_serial: true
         files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
-
--   repo: local
-    hooks:
       - id: openapi-codegen
         name: API Spec Codegen
         additional_dependencies:
-          - uv==0.6.2
-        entry: sh -c 'uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh > /dev/null'
+          - uv==0.7.8
+        entry: sh -c 'uv run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null'
         language: python
         pass_filenames: false
         require_serial: true
         files: ^llama_stack/apis/|^docs/openapi_generator/
+      - id: check-workflows-use-hashes
+        name: Check GitHub Actions use SHA-pinned actions
+        entry: ./scripts/check-workflows-use-hashes.sh
+        language: system
+        pass_filenames: false
+        require_serial: true
+        always_run: true
+        files: ^\.github/workflows/.*\.ya?ml$
 
 ci:
     autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index f114dbf9b..461977a6c 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -5,28 +5,21 @@
 # Required
 version: 2
 
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+
 # Set the OS, Python version and other tools you might need
 build:
   os: ubuntu-22.04
   tools:
     python: "3.12"
-    # You can also specify other tool versions:
-    # nodejs: "19"
-    # rust: "1.64"
-    # golang: "1.19"
-
-# Build documentation in the "docs/" directory with Sphinx
-sphinx:
-  configuration: docs/source/conf.py
-
-# Optionally build your docs in additional formats such as PDF and ePub
-# formats:
-#    - pdf
-#    - epub
-
-# Optional but recommended, declare the Python requirements required
-# to build your documentation
-# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
-python:
-   install:
-   - requirements: docs/requirements.txt
+  jobs:
+    pre_create_environment:
+      - asdf plugin add uv
+      - asdf install uv latest
+      - asdf global uv latest
+    create_environment:
+      - uv venv "${READTHEDOCS_VIRTUALENV_PATH}"
+    install:
+      - UV_PROJECT_ENVIRONMENT="${READTHEDOCS_VIRTUALENV_PATH}" uv sync --frozen --group docs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5086094ad..f7644a5af 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,75 @@
 # Changelog
 
+# v0.2.7
+Published on: 2025-05-16T20:38:10Z
+
+## Highlights
+
+This is a small update. But a couple highlights:
+
+* feat: function tools in OpenAI Responses by @bbrowning in https://github.com/meta-llama/llama-stack/pull/2094, getting closer to ready. Streaming is the next missing piece.
+* feat: Adding support for customizing chunk context in RAG insertion and querying by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2134
+* feat: scaffolding for Llama Stack UI by @ehhuang in https://github.com/meta-llama/llama-stack/pull/2149, more to come in the coming releases.
+
+
+---
+
+# v0.2.6
+Published on: 2025-05-12T18:06:52Z
+
+
+
+---
+
+# v0.2.5
+Published on: 2025-05-04T20:16:49Z
+
+
+
+---
+
+# v0.2.4
+Published on: 2025-04-29T17:26:01Z
+
+## Highlights
+
+* One-liner to install and run Llama Stack yay! by @reluctantfuturist in https://github.com/meta-llama/llama-stack/pull/1383
+* support for NVIDIA NeMo datastore by @raspawar in https://github.com/meta-llama/llama-stack/pull/1852
+* (yuge!) Kubernetes authentication by @leseb in https://github.com/meta-llama/llama-stack/pull/1778
+* (yuge!) OpenAI Responses API by @bbrowning in https://github.com/meta-llama/llama-stack/pull/1989
+* add api.llama provider, llama-guard-4 model by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2058
+
+
+---
+
+# v0.2.3
+Published on: 2025-04-25T22:46:21Z
+
+## Highlights
+
+* OpenAI compatible inference endpoints and client-SDK support. `client.chat.completions.create()` now works.
+* significant improvements and functionality added to the nVIDIA distribution
+* many improvements to the test verification suite.
+* new inference providers: Ramalama, IBM WatsonX
+* many improvements to the Playground UI
+
+
+---
+
+# v0.2.2
+Published on: 2025-04-13T01:19:49Z
+
+## Main changes
+
+- Bring Your Own Provider (@leseb) - use out-of-tree provider code to execute the distribution server
+- OpenAI compatible inference API in progress (@bbrowning)
+- Provider verifications (@ehhuang)
+- Many updates and fixes to playground
+- Several llama4 related fixes
+
+
+---
+
 # v0.2.1
 Published on: 2025-04-05T23:13:00Z
 
@@ -10,10 +80,10 @@ Published on: 2025-04-05T23:13:00Z
 # v0.2.0
 Published on: 2025-04-05T19:04:29Z
 
-## Llama 4 Support
-
-Checkout more at https://www.llama.com
-
+## Llama 4 Support
+
+Checkout more at https://www.llama.com
+
 
 
 ---
@@ -21,58 +91,58 @@ Checkout more at https://www.llama.com
 # v0.1.9
 Published on: 2025-03-29T00:52:23Z
 
-### Build and Test Agents
-* Agents: Entire document context with attachments
-* RAG: Documentation with sqlite-vec faiss comparison
-* Getting started: Fixes to getting started notebook.
-
-### Agent Evals and Model Customization
-* (**New**) Post-training: Add nemo customizer
-
-### Better Engineering
-* Moved sqlite-vec to non-blocking calls
-* Don't return a payload on file delete
-
-
+### Build and Test Agents
+* Agents: Entire document context with attachments
+* RAG: Documentation with sqlite-vec faiss comparison
+* Getting started: Fixes to getting started notebook.
+
+### Agent Evals and Model Customization
+* (**New**) Post-training: Add nemo customizer
+
+### Better Engineering
+* Moved sqlite-vec to non-blocking calls
+* Don't return a payload on file delete
+
+
 
 ---
 
 # v0.1.8
 Published on: 2025-03-24T01:28:50Z
 
-# v0.1.8 Release Notes
-
-### Build and Test Agents
-* Safety: Integrated NVIDIA as a safety provider.
-* VectorDB: Added Qdrant as an inline provider.
-* Agents: Added support for multiple tool groups in agents.
-* Agents: Simplified imports for Agents in client package
-
-
-### Agent Evals and Model Customization
-* Introduced DocVQA and IfEval benchmarks.
-
-### Deploying and Monitoring Agents
-* Introduced a Containerfile and image workflow for the Playground.
-* Implemented support for Bearer (API Key) authentication.
-* Added attribute-based access control for resources.
-* Fixes on docker deployments: use --pull always and standardized the default port to 8321
-* Deprecated: /v1/inspect/providers use /v1/providers/ instead
-
-### Better Engineering
-* Consolidated scripts under the ./scripts directory.
-* Addressed mypy violations in various modules.
-* Added Dependabot scans for Python dependencies.
-* Implemented a scheduled workflow to update the changelog automatically.
-* Enforced concurrency to reduce CI loads.
-
-
-### New Contributors
-* @cmodi-meta made their first contribution in https://github.com/meta-llama/llama-stack/pull/1650
-* @jeffmaury made their first contribution in https://github.com/meta-llama/llama-stack/pull/1671
-* @derekhiggins made their first contribution in https://github.com/meta-llama/llama-stack/pull/1698
-* @Bobbins228 made their first contribution in https://github.com/meta-llama/llama-stack/pull/1745
-
+# v0.1.8 Release Notes
+
+### Build and Test Agents
+* Safety: Integrated NVIDIA as a safety provider.
+* VectorDB: Added Qdrant as an inline provider.
+* Agents: Added support for multiple tool groups in agents.
+* Agents: Simplified imports for Agents in client package
+
+
+### Agent Evals and Model Customization
+* Introduced DocVQA and IfEval benchmarks.
+
+### Deploying and Monitoring Agents
+* Introduced a Containerfile and image workflow for the Playground.
+* Implemented support for Bearer (API Key) authentication.
+* Added attribute-based access control for resources.
+* Fixes on docker deployments: use --pull always and standardized the default port to 8321
+* Deprecated: /v1/inspect/providers use /v1/providers/ instead
+
+### Better Engineering
+* Consolidated scripts under the ./scripts directory.
+* Addressed mypy violations in various modules.
+* Added Dependabot scans for Python dependencies.
+* Implemented a scheduled workflow to update the changelog automatically.
+* Enforced concurrency to reduce CI loads.
+
+
+### New Contributors
+* @cmodi-meta made their first contribution in https://github.com/meta-llama/llama-stack/pull/1650
+* @jeffmaury made their first contribution in https://github.com/meta-llama/llama-stack/pull/1671
+* @derekhiggins made their first contribution in https://github.com/meta-llama/llama-stack/pull/1698
+* @Bobbins228 made their first contribution in https://github.com/meta-llama/llama-stack/pull/1745
+
 **Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.7...v0.1.8
 
 ---
@@ -80,73 +150,73 @@ Published on: 2025-03-24T01:28:50Z
 # v0.1.7
 Published on: 2025-03-14T22:30:51Z
 
-## 0.1.7 Release Notes
-
-###  Build and Test Agents
-* Inference: ImageType is now refactored to LlamaStackImageType
-* Inference: Added tests to measure TTFT
-* Inference: Bring back usage metrics
-* Agents: Added endpoint for get agent, list agents and list sessions
-* Agents: Automated conversion of type hints in client tool for lite llm format
-* Agents: Deprecated ToolResponseMessage in agent.resume API
-* Added Provider API for listing and inspecting provider info
-
-### Agent Evals and Model Customization
-* Eval: Added new eval benchmarks Math 500 and BFCL v3
-* Deploy and Monitoring of Agents
-* Telemetry: Fix tracing to work across coroutines
-
-###  Better Engineering
-* Display code coverage for unit tests
-* Updated call sites (inference, tool calls, agents) to move to async non blocking calls
-* Unit tests also run on Python 3.11, 3.12, and 3.13
-* Added ollama inference to Integration tests CI
-* Improved documentation across examples, testing, CLI, updated providers table )
-
-
-
+## 0.1.7 Release Notes
+
+###  Build and Test Agents
+* Inference: ImageType is now refactored to LlamaStackImageType
+* Inference: Added tests to measure TTFT
+* Inference: Bring back usage metrics
+* Agents: Added endpoint for get agent, list agents and list sessions
+* Agents: Automated conversion of type hints in client tool for lite llm format
+* Agents: Deprecated ToolResponseMessage in agent.resume API
+* Added Provider API for listing and inspecting provider info
+
+### Agent Evals and Model Customization
+* Eval: Added new eval benchmarks Math 500 and BFCL v3
+* Deploy and Monitoring of Agents
+* Telemetry: Fix tracing to work across coroutines
+
+###  Better Engineering
+* Display code coverage for unit tests
+* Updated call sites (inference, tool calls, agents) to move to async non blocking calls
+* Unit tests also run on Python 3.11, 3.12, and 3.13
+* Added ollama inference to Integration tests CI
+* Improved documentation across examples, testing, CLI, updated providers table )
+
+
+
 
 ---
 
 # v0.1.6
 Published on: 2025-03-08T04:35:08Z
 
-## 0.1.6 Release Notes
-
-### Build and Test Agents
-* Inference: Fixed support for inline vllm provider
-* (**New**) Agent: Build & Monitor Agent Workflows with Llama Stack + Anthropic's Best Practice [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb)
-* (**New**) Agent: Revamped agent [documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) with more details and examples
-* Agent: Unify tools and Python SDK Agents API
-* Agent: AsyncAgent Python SDK wrapper supporting async client tool calls
-* Agent: Support python functions without @client_tool decorator as client tools
-* Agent: deprecation for allow_resume_turn flag, and remove need to specify tool_prompt_format
-* VectorIO: MilvusDB support added
-
-### Agent Evals and Model Customization
-* (**New**) Agent: Llama Stack RAG Lifecycle [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb)
-* Eval: Documentation for eval, scoring, adding new benchmarks
-* Eval: Distribution template to run benchmarks on llama & non-llama models
-* Eval: Ability to register new custom LLM-as-judge scoring functions
-* (**New**) Looking for contributors for open benchmarks. See [documentation](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) for details.
-
-### Deploy and Monitoring of Agents
-* Better support for different log levels across all components for better monitoring
-
-### Better Engineering
-* Enhance OpenAPI spec to include Error types across all APIs
-* Moved all tests to /tests and created unit tests to run on each PR
-* Removed all dependencies on llama-models repo
-
+## 0.1.6 Release Notes
+
+### Build and Test Agents
+* Inference: Fixed support for inline vllm provider
+* (**New**) Agent: Build & Monitor Agent Workflows with Llama Stack + Anthropic's Best Practice [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb)
+* (**New**) Agent: Revamped agent [documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) with more details and examples
+* Agent: Unify tools and Python SDK Agents API
+* Agent: AsyncAgent Python SDK wrapper supporting async client tool calls
+* Agent: Support python functions without @client_tool decorator as client tools
+* Agent: deprecation for allow_resume_turn flag, and remove need to specify tool_prompt_format
+* VectorIO: MilvusDB support added
+
+### Agent Evals and Model Customization
+* (**New**) Agent: Llama Stack RAG Lifecycle [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb)
+* Eval: Documentation for eval, scoring, adding new benchmarks
+* Eval: Distribution template to run benchmarks on llama & non-llama models
+* Eval: Ability to register new custom LLM-as-judge scoring functions
+* (**New**) Looking for contributors for open benchmarks. See [documentation](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) for details.
+
+### Deploy and Monitoring of Agents
+* Better support for different log levels across all components for better monitoring
+
+### Better Engineering
+* Enhance OpenAPI spec to include Error types across all APIs
+* Moved all tests to /tests and created unit tests to run on each PR
+* Removed all dependencies on llama-models repo
+
 
 ---
 
 # v0.1.5.1
 Published on: 2025-02-28T22:37:44Z
 
-## 0.1.5.1 Release Notes
-* Fixes for security risk in https://github.com/meta-llama/llama-stack/pull/1327 and https://github.com/meta-llama/llama-stack/pull/1328
-
+## 0.1.5.1 Release Notes
+* Fixes for security risk in https://github.com/meta-llama/llama-stack/pull/1327 and https://github.com/meta-llama/llama-stack/pull/1328
+
 **Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.5...v0.1.5.1
 
 ---
@@ -154,176 +224,176 @@ Published on: 2025-02-28T22:37:44Z
 # v0.1.5
 Published on: 2025-02-28T18:14:01Z
 
-## 0.1.5 Release Notes
-###  Build Agents
-* Inference: Support more non-llama models (openai, anthropic, gemini)
-* Inference: Can use the provider's model name in addition to the HF alias
-* Inference: Fixed issues with calling tools that weren't specified in the prompt
-* RAG: Improved system prompt for RAG and no more need for hard-coded rag-tool calling
-* Embeddings: Added support for Nemo retriever embedding models
-* Tools: Added support for MCP tools in Ollama Distribution
-* Distributions: Added new Groq distribution
-
-### Customize Models
-* Save post-trained checkpoint in SafeTensor format to allow Ollama inference provider to use the post-trained model
-
-### Monitor agents
-* More comprehensive logging of agent steps including client tools
-* Telemetry inputs/outputs are now structured and queryable
-* Ability to retrieve agents session, turn, step by ids
-
-### Better Engineering
-* Moved executorch Swift code out of this repo into the llama-stack-client-swift repo, similar to kotlin
-* Move most logging to use logger instead of prints
-* Completed text /chat-completion and /completion tests
-
+## 0.1.5 Release Notes
+###  Build Agents
+* Inference: Support more non-llama models (openai, anthropic, gemini)
+* Inference: Can use the provider's model name in addition to the HF alias
+* Inference: Fixed issues with calling tools that weren't specified in the prompt
+* RAG: Improved system prompt for RAG and no more need for hard-coded rag-tool calling
+* Embeddings: Added support for Nemo retriever embedding models
+* Tools: Added support for MCP tools in Ollama Distribution
+* Distributions: Added new Groq distribution
+
+### Customize Models
+* Save post-trained checkpoint in SafeTensor format to allow Ollama inference provider to use the post-trained model
+
+### Monitor agents
+* More comprehensive logging of agent steps including client tools
+* Telemetry inputs/outputs are now structured and queryable
+* Ability to retrieve agents session, turn, step by ids
+
+### Better Engineering
+* Moved executorch Swift code out of this repo into the llama-stack-client-swift repo, similar to kotlin
+* Move most logging to use logger instead of prints
+* Completed text /chat-completion and /completion tests
+
 
 ---
 
 # v0.1.4
 Published on: 2025-02-25T00:02:43Z
 
-## v0.1.4 Release Notes
-Here are the key changes coming as part of this release:
-
-### Build and Test Agents
-* Inference: Added support for non-llama models
-* Inference: Added option to list all downloaded models and remove models
-* Agent: Introduce new api agents.resume_turn to include client side tool execution in the same turn
-* Agent: AgentConfig introduces new variable “tool_config” that allows for better tool configuration and system prompt overrides
-* Agent: Added logging for agent step start and completion times
-* Agent: Added support for logging for tool execution metadata
-* Embedding: Updated /inference/embeddings to support asymmetric models, truncation and variable sized outputs
-* Embedding: Updated embedding models for Ollama, Together, and Fireworks with available defaults
-* VectorIO: Improved performance of sqlite-vec using chunked writes
-### Agent Evals and Model Customization
-* Deprecated api /eval-tasks. Use /eval/benchmark  instead
-* Added CPU training support for TorchTune
-### Deploy and Monitoring of Agents
-* Consistent view of client and server tool calls in telemetry
-### Better Engineering
-* Made tests more data-driven for consistent evaluation
-* Fixed documentation links and improved API reference generation
-* Various small fixes for build scripts and system reliability
-
-
+## v0.1.4 Release Notes
+Here are the key changes coming as part of this release:
+
+### Build and Test Agents
+* Inference: Added support for non-llama models
+* Inference: Added option to list all downloaded models and remove models
+* Agent: Introduce new api agents.resume_turn to include client side tool execution in the same turn
+* Agent: AgentConfig introduces new variable “tool_config” that allows for better tool configuration and system prompt overrides
+* Agent: Added logging for agent step start and completion times
+* Agent: Added support for logging for tool execution metadata
+* Embedding: Updated /inference/embeddings to support asymmetric models, truncation and variable sized outputs
+* Embedding: Updated embedding models for Ollama, Together, and Fireworks with available defaults
+* VectorIO: Improved performance of sqlite-vec using chunked writes
+### Agent Evals and Model Customization
+* Deprecated api /eval-tasks. Use /eval/benchmark  instead
+* Added CPU training support for TorchTune
+### Deploy and Monitoring of Agents
+* Consistent view of client and server tool calls in telemetry
+### Better Engineering
+* Made tests more data-driven for consistent evaluation
+* Fixed documentation links and improved API reference generation
+* Various small fixes for build scripts and system reliability
+
+
 
 ---
 
 # v0.1.3
 Published on: 2025-02-14T20:24:32Z
 
-## v0.1.3 Release
-
-Here are some key changes that are coming as part of this release.
-
-### Build and Test Agents
-Streamlined the initial development experience
-- Added support for  llama stack run --image-type venv
-- Enhanced vector store options with new sqlite-vec provider and improved Qdrant integration
-- vLLM improvements for tool calling and logprobs
-- Better handling of sporadic code_interpreter tool calls
-
-### Agent Evals
-Better benchmarking and Agent performance assessment
-- Renamed eval API /eval-task to /benchmarks
-- Improved documentation and notebooks for RAG and evals
-
-### Deploy and Monitoring of Agents
-Improved production readiness
-- Added usage metrics collection for chat completions
-- CLI improvements for provider information
-- Improved error handling and system reliability
-- Better model endpoint handling and accessibility
-- Improved signal handling on distro server
-
-### Better Engineering
-Infrastructure and code quality improvements
-- Faster text-based chat completion tests
-- Improved testing for non-streaming agent apis
-- Standardized import formatting with ruff linter
-- Added conventional commits standard
-- Fixed documentation parsing issues
-
+## v0.1.3 Release
+
+Here are some key changes that are coming as part of this release.
+
+### Build and Test Agents
+Streamlined the initial development experience
+- Added support for  llama stack run --image-type venv
+- Enhanced vector store options with new sqlite-vec provider and improved Qdrant integration
+- vLLM improvements for tool calling and logprobs
+- Better handling of sporadic code_interpreter tool calls
+
+### Agent Evals
+Better benchmarking and Agent performance assessment
+- Renamed eval API /eval-task to /benchmarks
+- Improved documentation and notebooks for RAG and evals
+
+### Deploy and Monitoring of Agents
+Improved production readiness
+- Added usage metrics collection for chat completions
+- CLI improvements for provider information
+- Improved error handling and system reliability
+- Better model endpoint handling and accessibility
+- Improved signal handling on distro server
+
+### Better Engineering
+Infrastructure and code quality improvements
+- Faster text-based chat completion tests
+- Improved testing for non-streaming agent apis
+- Standardized import formatting with ruff linter
+- Added conventional commits standard
+- Fixed documentation parsing issues
+
 
 ---
 
 # v0.1.2
 Published on: 2025-02-07T22:06:49Z
 
-# TL;DR
-- Several stabilizations to development flows after the switch to `uv`
-- Migrated CI workflows to new OSS repo - [llama-stack-ops](https://github.com/meta-llama/llama-stack-ops)
-- Added automated rebuilds for ReadTheDocs
-- Llama Stack server supports HTTPS
-- Added system prompt overrides support
-- Several bug fixes and improvements to documentation (check out Kubernetes deployment guide by @terrytangyuan )
-
+# TL;DR
+- Several stabilizations to development flows after the switch to `uv`
+- Migrated CI workflows to new OSS repo - [llama-stack-ops](https://github.com/meta-llama/llama-stack-ops)
+- Added automated rebuilds for ReadTheDocs
+- Llama Stack server supports HTTPS
+- Added system prompt overrides support
+- Several bug fixes and improvements to documentation (check out Kubernetes deployment guide by @terrytangyuan )
+
 
 ---
 
 # v0.1.1
 Published on: 2025-02-02T02:29:24Z
 
-A bunch of small / big improvements everywhere including support for Windows, switching to `uv` and many provider improvements.
-
+A bunch of small / big improvements everywhere including support for Windows, switching to `uv` and many provider improvements.
+
 
 ---
 
 # v0.1.0
 Published on: 2025-01-24T17:47:47Z
 
-We are excited to announce a stable API release of Llama Stack, which enables developers to build RAG applications and Agents using tools and safety shields, monitor and those agents with telemetry, and evaluate the agent with scoring functions.
-
-## Context
-GenAI application developers need more than just an LLM - they need to integrate tools, connect with their data sources, establish guardrails, and ground the LLM responses effectively. Currently, developers must piece together various tools and APIs, complicating the development lifecycle and increasing costs. The result is that developers are spending more time on these integrations rather than focusing on the application logic itself. The bespoke coupling of components also makes it challenging to adopt state-of-the-art solutions in the rapidly evolving GenAI space. This is particularly difficult for open models like Llama, as best practices are not widely established in the open.
-
-Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety.
-
-With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv, conda, or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience.
-
-## Release
-After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements.
-
-There are example standalone apps in llama-stack-apps.
-
-
-## Key Features of this release
-
-- **Unified API Layer**
-  - Inference: Run LLM models
-  - RAG: Store and retrieve knowledge for RAG
-  - Agents: Build multi-step agentic workflows
-  - Tools: Register tools that can be called by the agent
-  - Safety: Apply content filtering and safety policies
-  - Evaluation: Test model and agent quality
-  - Telemetry: Collect and analyze usage data and complex agentic traces
-  - Post Training ( Coming Soon ): Fine tune models for specific use cases
-
-- **Rich Provider Ecosystem**
-  - Local Development: Meta's Reference, Ollama
-  - Cloud: Fireworks, Together, Nvidia, AWS Bedrock, Groq, Cerebras
-  - On-premises: Nvidia NIM, vLLM, TGI, Dell-TGI
-  - On-device: iOS and Android support
-
-- **Built for Production**
-  - Pre-packaged distributions for common deployment scenarios
-  - Backwards compatibility across model versions
-  - Comprehensive evaluation capabilities
-  - Full observability and monitoring
-
-- **Multiple developer interfaces**
-  - CLI: Command line interface
-  - Python SDK
-  - Swift iOS SDK
-  - Kotlin Android SDK
-
-- **Sample llama stack applications**
-  - Python
-  - iOS
-  - Android
-
-
+We are excited to announce a stable API release of Llama Stack, which enables developers to build RAG applications and Agents using tools and safety shields, monitor and those agents with telemetry, and evaluate the agent with scoring functions.
+
+## Context
+GenAI application developers need more than just an LLM - they need to integrate tools, connect with their data sources, establish guardrails, and ground the LLM responses effectively. Currently, developers must piece together various tools and APIs, complicating the development lifecycle and increasing costs. The result is that developers are spending more time on these integrations rather than focusing on the application logic itself. The bespoke coupling of components also makes it challenging to adopt state-of-the-art solutions in the rapidly evolving GenAI space. This is particularly difficult for open models like Llama, as best practices are not widely established in the open.
+
+Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety.
+
+With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv, conda, or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience.
+
+## Release
+After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements.
+
+There are example standalone apps in llama-stack-apps.
+
+
+## Key Features of this release
+
+- **Unified API Layer**
+  - Inference: Run LLM models
+  - RAG: Store and retrieve knowledge for RAG
+  - Agents: Build multi-step agentic workflows
+  - Tools: Register tools that can be called by the agent
+  - Safety: Apply content filtering and safety policies
+  - Evaluation: Test model and agent quality
+  - Telemetry: Collect and analyze usage data and complex agentic traces
+  - Post Training ( Coming Soon ): Fine tune models for specific use cases
+
+- **Rich Provider Ecosystem**
+  - Local Development: Meta's Reference, Ollama
+  - Cloud: Fireworks, Together, Nvidia, AWS Bedrock, Groq, Cerebras
+  - On-premises: Nvidia NIM, vLLM, TGI, Dell-TGI
+  - On-device: iOS and Android support
+
+- **Built for Production**
+  - Pre-packaged distributions for common deployment scenarios
+  - Backwards compatibility across model versions
+  - Comprehensive evaluation capabilities
+  - Full observability and monitoring
+
+- **Multiple developer interfaces**
+  - CLI: Command line interface
+  - Python SDK
+  - Swift iOS SDK
+  - Kotlin Android SDK
+
+- **Sample llama stack applications**
+  - Python
+  - iOS
+  - Android
+
+
 
 ---
 
@@ -337,8 +407,8 @@ Published on: 2025-01-22T22:24:01Z
 # v0.0.63
 Published on: 2024-12-18T07:17:43Z
 
-A small but important bug-fix release to update the URL datatype for the client-SDKs. The issue affected multimodal agentic turns especially.
-
+A small but important bug-fix release to update the URL datatype for the client-SDKs. The issue affected multimodal agentic turns especially.
+
 **Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.0.62...v0.0.63
 
 ---
@@ -374,39 +444,39 @@ Published on: 2024-11-22T00:36:09Z
 # v0.0.53
 Published on: 2024-11-20T22:18:00Z
 
-🚀  Initial Release Notes for Llama Stack!
-
-### Added
-- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
-- Persistence for registered objects with distribution
-- Ability to persist memory banks created for FAISS
-- PostgreSQL KVStore implementation
-- Environment variable placeholder support in run.yaml files
-- Comprehensive Zero-to-Hero notebooks and quickstart guides
-- Support for quantized models in Ollama
-- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
-- Bedrock distribution with safety shields support
-- Evals API with task registration and scoring functions
-- MMLU and SimpleQA benchmark scoring functions
-- Huggingface dataset provider integration for benchmarks
-- Support for custom dataset registration from local paths
-- Benchmark evaluation CLI tools with visualization tables
-- RAG evaluation scoring functions and metrics
-- Local persistence for datasets and eval tasks
-
-### Changed
-- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
-- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`)
-- Updated API signatures for dataset and eval task registration
-- Restructured folder organization for providers
-- Enhanced Docker build configuration
-- Added version prefixing for REST API routes
-- Enhanced evaluation task registration workflow
-- Improved benchmark evaluation output formatting
-- Restructured evals folder organization for better modularity
-
-### Removed
-- `llama stack configure` command
-
+🚀  Initial Release Notes for Llama Stack!
+
+### Added
+- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
+- Persistence for registered objects with distribution
+- Ability to persist memory banks created for FAISS
+- PostgreSQL KVStore implementation
+- Environment variable placeholder support in run.yaml files
+- Comprehensive Zero-to-Hero notebooks and quickstart guides
+- Support for quantized models in Ollama
+- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
+- Bedrock distribution with safety shields support
+- Evals API with task registration and scoring functions
+- MMLU and SimpleQA benchmark scoring functions
+- Huggingface dataset provider integration for benchmarks
+- Support for custom dataset registration from local paths
+- Benchmark evaluation CLI tools with visualization tables
+- RAG evaluation scoring functions and metrics
+- Local persistence for datasets and eval tasks
+
+### Changed
+- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
+- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`)
+- Updated API signatures for dataset and eval task registration
+- Restructured folder organization for providers
+- Enhanced Docker build configuration
+- Added version prefixing for REST API routes
+- Enhanced evaluation task registration workflow
+- Improved benchmark evaluation output formatting
+- Restructured evals folder organization for better modularity
+
+### Removed
+- `llama stack configure` command
+
 
 ---
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5828250d0..10e3f6cee 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -110,25 +110,9 @@ uv run pre-commit run --all-files
 > [!CAUTION]
 > Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
 
-## Running unit tests
+## Running tests
 
-You can run the unit tests by running:
-
-```bash
-source .venv/bin/activate
-./scripts/unit-tests.sh
-```
-
-If you'd like to run for a non-default version of Python (currently 3.10), pass `PYTHON_VERSION` variable as follows:
-
-```
-source .venv/bin/activate
-PYTHON_VERSION=3.13 ./scripts/unit-tests.sh
-```
-
-## Running integration tests
-
-You can run integration tests following the instructions [here](tests/integration/README.md).
+You can find the Llama Stack testing documentation here [here](tests/README.md).
 
 ## Adding a new dependency to the project
 
@@ -141,11 +125,20 @@ uv sync
 
 ## Coding Style
 
-* Comments should provide meaningful insights into the code. Avoid filler comments that simply describe the next step, as they create unnecessary clutter, same goes for docstrings.
-* Prefer comments to clarify surprising behavior and/or relationships between parts of the code rather than explain what the next line of code does.
-* Catching exceptions, prefer using a specific exception type rather than a broad catch-all like `Exception`.
+* Comments should provide meaningful insights into the code. Avoid filler comments that simply
+  describe the next step, as they create unnecessary clutter, same goes for docstrings.
+* Prefer comments to clarify surprising behavior and/or relationships between parts of the code
+  rather than explain what the next line of code does.
+* Catching exceptions, prefer using a specific exception type rather than a broad catch-all like
+  `Exception`.
 * Error messages should be prefixed with "Failed to ..."
-* 4 spaces for indentation rather than tabs
+* 4 spaces for indentation rather than tab
+* When using `# noqa` to suppress a style or linter warning, include a comment explaining the
+  justification for bypassing the check.
+* When using `# type: ignore` to suppress a mypy warning, include a comment explaining the
+  justification for bypassing the check.
+* Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or
+  readability reasons.
 
 ## Common Tasks
 
@@ -174,14 +167,11 @@ If you have made changes to a provider's configuration in any form (introducing
 If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme.
 
 ```bash
-cd docs
-uv sync --extra docs
-
 # This rebuilds the documentation pages.
-uv run make html
+uv run --group docs make -C docs/ html
 
 # This will start a local server (usually at http://127.0.0.1:8000) that automatically rebuilds and refreshes when you make changes to the documentation.
-uv run sphinx-autobuild source build/html --write-all
+uv run --group docs sphinx-autobuild docs/source docs/build/html --write-all
 ```
 
 ### Update API Documentation
@@ -189,7 +179,7 @@ uv run sphinx-autobuild source build/html --write-all
 If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command:
 
 ```bash
-uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh
+uv run ./docs/openapi_generator/run_openapi_generator.sh
 ```
 
 The generated API documentation will be available in `docs/_static/`. Make sure to review the changes before committing.
diff --git a/MANIFEST.in b/MANIFEST.in
index 879a9cbd4..88bd11767 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,4 @@
 include pyproject.toml
-include llama_stack/templates/dependencies.json
 include llama_stack/models/llama/llama3/tokenizer.model
 include llama_stack/models/llama/llama4/tokenizer.model
 include llama_stack/distribution/*.sh
diff --git a/README.md b/README.md
index 617e5117b..37f1aa0f3 100644
--- a/README.md
+++ b/README.md
@@ -7,17 +7,18 @@
 [![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
 [![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
 
-[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb)
-
+[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
 
 ### ✨🎉 Llama 4 Support  🎉✨
 We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
 
-You can now run Llama 4 models on Llama Stack.
+<details>
 
+<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
+
+\
 *Note you need 8xH100 GPU-host to run these models*
 
-
 ```bash
 pip install -U llama_stack
 
@@ -67,6 +68,16 @@ print(f"Assistant> {response.completion_message.content}")
 As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
 
 
+</details>
+
+### 🚀 One-Line Installer 🚀
+
+To try Llama Stack locally, run:
+
+```bash
+curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/install.sh | sh
+```
+
 ### Overview
 
 Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
@@ -96,25 +107,29 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
 ### API Providers
 Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
 
-| **API Provider Builder** |    **Environments**    | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
-|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|
-|      Meta Reference      |      Single Node       |     ✅      |       ✅       |     ✅      |     ✅      |       ✅       |
-|        SambaNova         |         Hosted         |            |       ✅       |            |            |               |
-|         Cerebras         |         Hosted         |            |       ✅       |            |            |               |
-|        Fireworks         |         Hosted         |     ✅      |       ✅       |     ✅      |            |               |
-|       AWS Bedrock        |         Hosted         |            |       ✅       |            |     ✅      |               |
-|         Together         |         Hosted         |     ✅      |       ✅       |            |     ✅      |               |
-|           Groq           |         Hosted         |            |       ✅       |            |            |               |
-|          Ollama          |      Single Node       |            |       ✅       |            |            |               |
-|           TGI            | Hosted and Single Node |            |       ✅       |            |            |               |
-|        NVIDIA NIM        | Hosted and Single Node |            |       ✅       |            |            |               |
-|          Chroma          |      Single Node       |            |               |     ✅      |            |               |
-|        PG Vector         |      Single Node       |            |               |     ✅      |            |               |
-|    PyTorch ExecuTorch    |     On-device iOS      |     ✅      |       ✅       |            |            |               |
-|           vLLM           | Hosted and Single Node |            |       ✅       |            |            |               |
-|          OpenAI          |         Hosted         |            |       ✅       |            |            |               |
-|        Anthropic         |         Hosted         |            |       ✅       |            |            |               |
-|          Gemini          |         Hosted         |            |       ✅       |            |            |               |
+| **API Provider Builder** |    **Environments**    | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | **Post Training** |
+|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|:-----------------:|
+|      Meta Reference      |      Single Node       |     ✅      |       ✅       |     ✅      |     ✅      |       ✅       |               |
+|        SambaNova         |         Hosted         |            |       ✅       |            |     ✅      |               |                  |
+|         Cerebras         |         Hosted         |            |       ✅       |            |            |               |                  |
+|        Fireworks         |         Hosted         |     ✅      |       ✅       |     ✅      |            |               |                |
+|       AWS Bedrock        |         Hosted         |            |       ✅       |            |     ✅      |               |                |
+|         Together         |         Hosted         |     ✅      |       ✅       |            |     ✅      |               |                |
+|           Groq           |         Hosted         |            |       ✅       |            |            |               |                 |
+|          Ollama          |      Single Node       |            |       ✅       |            |            |               |                 |
+|           TGI            | Hosted and Single Node |            |       ✅       |            |            |               |                 |
+|        NVIDIA NIM        | Hosted and Single Node |            |       ✅       |            |            |               |                 |
+|          Chroma          |      Single Node       |            |               |     ✅      |            |               |                 |
+|        PG Vector         |      Single Node       |            |               |     ✅      |            |               |                 |
+|    PyTorch ExecuTorch    |     On-device iOS      |     ✅      |       ✅       |            |            |               |                |
+|           vLLM           | Hosted and Single Node |            |       ✅       |            |            |               |                 |
+|          OpenAI          |         Hosted         |            |       ✅       |            |            |               |                 |
+|        Anthropic         |         Hosted         |            |       ✅       |            |            |               |                 |
+|          Gemini          |         Hosted         |            |       ✅       |            |            |               |                 |
+|          watsonx         |         Hosted         |            |       ✅       |            |            |               |                 |
+|        HuggingFace       |       Single Node      |            |                |            |            |               |       ✅        |
+|         TorchTune        |       Single Node      |            |                |            |            |               |       ✅        |
+|       NVIDIA NEMO        |         Hosted         |            |                |            |            |               |       ✅        |
 
 
 ### Distributions
@@ -124,7 +139,6 @@ A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider
 |               **Distribution**                |                                                                    **Llama Stack Docker**                                                                     |                                                 Start This Distribution                                                  |
 |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
 |                Meta Reference                 |           [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general)           |      [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html)      |
-|           Meta Reference Quantized            | [llamastack/distribution-meta-reference-quantized-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-quantized-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-quantized-gpu.html) |
 |                   SambaNova                   |                     [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html)   |
 |                   Cerebras                    |                     [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html)   |
 |                    Ollama                     |                       [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general)                       |            [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html)            |
diff --git a/docs/_static/css/my_theme.css b/docs/_static/css/my_theme.css
index a587f866d..d078ec057 100644
--- a/docs/_static/css/my_theme.css
+++ b/docs/_static/css/my_theme.css
@@ -27,3 +27,9 @@ pre {
     white-space: pre-wrap !important;
     word-break: break-all;
 }
+
+[data-theme="dark"] .mermaid {
+    background-color: #f4f4f6 !important;
+    border-radius: 6px;
+    padding: 0.5em;
+  }
diff --git a/docs/_static/js/detect_theme.js b/docs/_static/js/detect_theme.js
index 484b2bb8b..712565ef7 100644
--- a/docs/_static/js/detect_theme.js
+++ b/docs/_static/js/detect_theme.js
@@ -1,9 +1,32 @@
 document.addEventListener("DOMContentLoaded", function () {
   const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
   const htmlElement = document.documentElement;
-  if (prefersDark) {
-    htmlElement.setAttribute("data-theme", "dark");
+
+  // Check if theme is saved in localStorage
+  const savedTheme = localStorage.getItem("sphinx-rtd-theme");
+
+  if (savedTheme) {
+    // Use the saved theme preference
+    htmlElement.setAttribute("data-theme", savedTheme);
+    document.body.classList.toggle("dark", savedTheme === "dark");
   } else {
-    htmlElement.setAttribute("data-theme", "light");
+    // Fall back to system preference
+    const theme = prefersDark ? "dark" : "light";
+    htmlElement.setAttribute("data-theme", theme);
+    document.body.classList.toggle("dark", theme === "dark");
+    // Save initial preference
+    localStorage.setItem("sphinx-rtd-theme", theme);
   }
+
+  // Listen for theme changes from the existing toggle
+  const observer = new MutationObserver(function(mutations) {
+    mutations.forEach(function(mutation) {
+      if (mutation.attributeName === "data-theme") {
+        const currentTheme = htmlElement.getAttribute("data-theme");
+        localStorage.setItem("sphinx-rtd-theme", currentTheme);
+      }
+    });
+  });
+
+  observer.observe(htmlElement, { attributes: true });
 });
diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 542fb5be5..d88462909 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -62,11 +62,12 @@
                 "tags": [
                     "DatasetIO"
                 ],
-                "description": "",
+                "description": "Append rows to a dataset.",
                 "parameters": [
                     {
                         "name": "dataset_id",
                         "in": "path",
+                        "description": "The ID of the dataset to append the rows to.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -89,7 +90,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A BatchChatCompletionResponse with the full completions.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -114,7 +115,7 @@
                 "tags": [
                     "Inference"
                 ],
-                "description": "",
+                "description": "Generate chat completions for a batch of messages using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -132,7 +133,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A BatchCompletionResponse with the full completions.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -157,7 +158,7 @@
                 "tags": [
                     "Inference"
                 ],
-                "description": "",
+                "description": "Generate completions for a batch of content using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -193,7 +194,7 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Cancel a training job.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -211,7 +212,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk",
+                        "description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -259,7 +260,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk",
+                        "description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -307,11 +308,11 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "A ListAgentsResponse.",
+                        "description": "A PaginatedResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ListAgentsResponse"
+                                    "$ref": "#/components/schemas/PaginatedResponse"
                                 }
                             }
                         }
@@ -333,7 +334,26 @@
                     "Agents"
                 ],
                 "description": "List all agents.",
-                "parameters": []
+                "parameters": [
+                    {
+                        "name": "start_index",
+                        "in": "query",
+                        "description": "The index to start the pagination from.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "The number of agents to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    }
+                ]
             },
             "post": {
                 "responses": {
@@ -434,7 +454,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "If stream=False, returns a Turn object. If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk",
+                        "description": "If stream=False, returns a Turn object. If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -497,11 +517,127 @@
                 }
             }
         },
+        "/v1/openai/v1/responses": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListOpenAIResponseObject.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIResponseObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "description": "List all OpenAI responses.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "The ID of the last response to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "The number of responses to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "model",
+                        "in": "query",
+                        "description": "The model to filter responses by.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "The order to sort responses by when sorted by created_at ('asc' or 'desc').",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIResponseObject.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIResponseObject"
+                                }
+                            },
+                            "text/event-stream": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIResponseObjectStream"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "description": "Create a new OpenAI response.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CreateOpenaiResponseRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/files": {
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListBucketResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -531,6 +667,7 @@
                     {
                         "name": "bucket",
                         "in": "query",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -541,7 +678,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A FileUploadResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -643,7 +780,7 @@
                 "tags": [
                     "Agents"
                 ],
-                "description": "Delete an agent by its ID.",
+                "description": "Delete an agent by its ID and its associated sessions and turns.",
                 "parameters": [
                     {
                         "name": "agent_id",
@@ -661,7 +798,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Session.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -741,7 +878,7 @@
                 "tags": [
                     "Agents"
                 ],
-                "description": "Delete an agent session by its ID.",
+                "description": "Delete an agent session by its ID and its associated turns.",
                 "parameters": [
                     {
                         "name": "session_id",
@@ -768,7 +905,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A FileResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -798,7 +935,7 @@
                     {
                         "name": "bucket",
                         "in": "path",
-                        "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -807,7 +944,7 @@
                     {
                         "name": "key",
                         "in": "path",
-                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -841,7 +978,7 @@
                     {
                         "name": "bucket",
                         "in": "path",
-                        "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -850,7 +987,7 @@
                     {
                         "name": "key",
                         "in": "path",
-                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -863,7 +1000,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}",
+                        "description": "An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -906,7 +1043,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "EvaluateResponse object containing generations and scores",
+                        "description": "EvaluateResponse object containing generations and scores.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1090,7 +1227,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Benchmark.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1115,11 +1252,55 @@
                 "tags": [
                     "Benchmarks"
                 ],
-                "description": "",
+                "description": "Get a benchmark by its ID.",
                 "parameters": [
                     {
                         "name": "benchmark_id",
                         "in": "path",
+                        "description": "The ID of the benchmark to get.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
+        "/v1/openai/v1/chat/completions/{completion_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A OpenAICompletionWithInputMessages.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAICompletionWithInputMessages"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "description": "Describe a chat completion by its ID.",
+                "parameters": [
+                    {
+                        "name": "completion_id",
+                        "in": "path",
+                        "description": "ID of the chat completion.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1132,7 +1313,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Dataset.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1157,11 +1338,12 @@
                 "tags": [
                     "Datasets"
                 ],
-                "description": "",
+                "description": "Get a dataset by its ID.",
                 "parameters": [
                     {
                         "name": "dataset_id",
                         "in": "path",
+                        "description": "The ID of the dataset to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1190,11 +1372,12 @@
                 "tags": [
                     "Datasets"
                 ],
-                "description": "",
+                "description": "Unregister a dataset by its ID.",
                 "parameters": [
                     {
                         "name": "dataset_id",
                         "in": "path",
+                        "description": "The ID of the dataset to unregister.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1207,7 +1390,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Model.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1232,11 +1415,12 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "Get a model by its identifier.",
                 "parameters": [
                     {
                         "name": "model_id",
                         "in": "path",
+                        "description": "The identifier of the model to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1265,11 +1449,55 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "Unregister a model.",
                 "parameters": [
                     {
                         "name": "model_id",
                         "in": "path",
+                        "description": "The identifier of the model to unregister.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
+        "/v1/openai/v1/responses/{response_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIResponseObject.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIResponseObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "description": "Retrieve an OpenAI response by its ID.",
+                "parameters": [
+                    {
+                        "name": "response_id",
+                        "in": "path",
+                        "description": "The ID of the OpenAI response to retrieve.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1282,7 +1510,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ScoringFn.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1307,11 +1535,12 @@
                 "tags": [
                     "ScoringFunctions"
                 ],
-                "description": "",
+                "description": "Get a scoring function by its ID.",
                 "parameters": [
                     {
                         "name": "scoring_fn_id",
                         "in": "path",
+                        "description": "The ID of the scoring function to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1324,7 +1553,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Shield.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1349,11 +1578,12 @@
                 "tags": [
                     "Shields"
                 ],
-                "description": "",
+                "description": "Get a shield by its identifier.",
                 "parameters": [
                     {
                         "name": "identifier",
                         "in": "path",
+                        "description": "The identifier of the shield to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1366,7 +1596,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Span.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1391,11 +1621,12 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Get a span by its ID.",
                 "parameters": [
                     {
                         "name": "trace_id",
                         "in": "path",
+                        "description": "The ID of the trace to get the span from.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1404,6 +1635,7 @@
                     {
                         "name": "span_id",
                         "in": "path",
+                        "description": "The ID of the span to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1416,7 +1648,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A QuerySpanTreeResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1441,11 +1673,12 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Get a span tree by its ID.",
                 "parameters": [
                     {
                         "name": "span_id",
                         "in": "path",
+                        "description": "The ID of the span to get the tree from.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1468,7 +1701,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Tool.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1493,11 +1726,12 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "",
+                "description": "Get a tool by its name.",
                 "parameters": [
                     {
                         "name": "tool_name",
                         "in": "path",
+                        "description": "The name of the tool to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1510,7 +1744,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ToolGroup.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1535,11 +1769,12 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "",
+                "description": "Get a tool group by its ID.",
                 "parameters": [
                     {
                         "name": "toolgroup_id",
                         "in": "path",
+                        "description": "The ID of the tool group to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1568,11 +1803,12 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "Unregister a tool group",
+                "description": "Unregister a tool group.",
                 "parameters": [
                     {
                         "name": "toolgroup_id",
                         "in": "path",
+                        "description": "The ID of the tool group to unregister.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1585,7 +1821,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Trace.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1610,11 +1846,12 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Get a trace by its ID.",
                 "parameters": [
                     {
                         "name": "trace_id",
                         "in": "path",
+                        "description": "The ID of the trace to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1627,7 +1864,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PostTrainingJobArtifactsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1652,11 +1889,12 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Get the artifacts of a training job.",
                 "parameters": [
                     {
                         "name": "job_uuid",
                         "in": "query",
+                        "description": "The UUID of the job to get the artifacts of.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1669,7 +1907,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PostTrainingJobStatusResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1694,11 +1932,12 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Get the status of a training job.",
                 "parameters": [
                     {
                         "name": "job_uuid",
                         "in": "query",
+                        "description": "The UUID of the job to get the status of.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1711,7 +1950,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListPostTrainingJobsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1736,7 +1975,7 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Get all training jobs.",
                 "parameters": []
             }
         },
@@ -1744,7 +1983,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A FileUploadResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1769,12 +2008,12 @@
                 "tags": [
                     "Files"
                 ],
-                "description": "Returns information about an existsing upload session",
+                "description": "Returns information about an existsing upload session.",
                 "parameters": [
                     {
                         "name": "upload_id",
                         "in": "path",
-                        "description": "ID of the upload session",
+                        "description": "ID of the upload session.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1785,7 +2024,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A FileResponse or None if the upload is not complete.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1822,7 +2061,7 @@
                     {
                         "name": "upload_id",
                         "in": "path",
-                        "description": "ID of the upload session",
+                        "description": "ID of the upload session.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1846,7 +2085,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A VectorDB.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1871,11 +2110,12 @@
                 "tags": [
                     "VectorDBs"
                 ],
-                "description": "",
+                "description": "Get a vector database by its identifier.",
                 "parameters": [
                     {
                         "name": "vector_db_id",
                         "in": "path",
+                        "description": "The identifier of the vector database to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1904,11 +2144,12 @@
                 "tags": [
                     "VectorDBs"
                 ],
-                "description": "",
+                "description": "Unregister a vector database.",
                 "parameters": [
                     {
                         "name": "vector_db_id",
                         "in": "path",
+                        "description": "The identifier of the vector database to unregister.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1921,7 +2162,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A HealthInfo.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1946,7 +2187,7 @@
                 "tags": [
                     "Inspect"
                 ],
-                "description": "",
+                "description": "Get the health of the service.",
                 "parameters": []
             }
         },
@@ -2008,7 +2249,7 @@
                 "tags": [
                     "VectorIO"
                 ],
-                "description": "",
+                "description": "Insert chunks into a vector database.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2026,7 +2267,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ProviderInfo object containing the provider's details.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2051,11 +2292,12 @@
                 "tags": [
                     "Providers"
                 ],
-                "description": "",
+                "description": "Get detailed information about a specific provider.",
                 "parameters": [
                     {
                         "name": "provider_id",
                         "in": "path",
+                        "description": "The ID of the provider to inspect.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2068,7 +2310,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ToolInvocationResult.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2093,7 +2335,7 @@
                 "tags": [
                     "ToolRuntime"
                 ],
-                "description": "Run a tool with the given arguments",
+                "description": "Run a tool with the given arguments.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2111,7 +2353,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PaginatedResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2136,7 +2378,7 @@
                 "tags": [
                     "DatasetIO"
                 ],
-                "description": "Get a paginated list of rows from a dataset.\nUses offset-based pagination where:\n- start_index: The starting index (0-based). If None, starts from beginning.\n- limit: Number of items to return. If None or -1, returns all items.\n\nThe response includes:\n- data: List of items for the current page\n- has_more: Whether there are more items available after this set",
+                "description": "Get a paginated list of rows from a dataset.\nUses offset-based pagination where:\n- start_index: The starting index (0-based). If None, starts from beginning.\n- limit: Number of items to return. If None or -1, returns all items.\n\nThe response includes:\n- data: List of items for the current page.\n- has_more: Whether there are more items available after this set.",
                 "parameters": [
                     {
                         "name": "dataset_id",
@@ -2172,7 +2414,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "The status of the evaluationjob.",
+                        "description": "The status of the evaluation job.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2319,11 +2561,11 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "A ListAgentSessionsResponse.",
+                        "description": "A PaginatedResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ListAgentSessionsResponse"
+                                    "$ref": "#/components/schemas/PaginatedResponse"
                                 }
                             }
                         }
@@ -2354,6 +2596,24 @@
                         "schema": {
                             "type": "string"
                         }
+                    },
+                    {
+                        "name": "start_index",
+                        "in": "query",
+                        "description": "The index to start the pagination from.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "The number of sessions to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
                     }
                 ]
             }
@@ -2362,7 +2622,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListBenchmarksResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2387,7 +2647,7 @@
                 "tags": [
                     "Benchmarks"
                 ],
-                "description": "",
+                "description": "List all benchmarks.",
                 "parameters": []
             },
             "post": {
@@ -2411,7 +2671,7 @@
                 "tags": [
                     "Benchmarks"
                 ],
-                "description": "",
+                "description": "Register a benchmark.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2425,11 +2685,129 @@
                 }
             }
         },
+        "/v1/openai/v1/chat/completions": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListOpenAIChatCompletionResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIChatCompletionResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "description": "List all chat completions.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "The ID of the last chat completion to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "The maximum number of chat completions to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "model",
+                        "in": "query",
+                        "description": "The model to filter by.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "The order to sort the chat completions by: \"asc\" or \"desc\". Defaults to \"desc\".",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIChatCompletion.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIChatCompletion"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIChatCompletionChunk"
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiChatCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/datasets": {
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListDatasetsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2454,13 +2832,13 @@
                 "tags": [
                     "Datasets"
                 ],
-                "description": "",
+                "description": "List all datasets.",
                 "parameters": []
             },
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Dataset.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2503,7 +2881,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListFileResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2533,7 +2911,7 @@
                     {
                         "name": "bucket",
                         "in": "path",
-                        "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2546,7 +2924,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListModelsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2571,13 +2949,13 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "List all models.",
                 "parameters": []
             },
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Model.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2602,7 +2980,7 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "Register a model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2616,11 +2994,102 @@
                 }
             }
         },
+        "/v1/openai/v1/responses/{response_id}/input_items": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "An ListOpenAIResponseInputItem.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIResponseInputItem"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "description": "List input items for a given OpenAI response.",
+                "parameters": [
+                    {
+                        "name": "response_id",
+                        "in": "path",
+                        "description": "The ID of the response to retrieve input items for.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "An item ID to list items after, used for pagination.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "before",
+                        "in": "query",
+                        "description": "An item ID to list items before, used for pagination.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "include",
+                        "in": "query",
+                        "description": "Additional fields to include in the response.",
+                        "required": false,
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "The order to return the input items in. Default is desc.",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/providers": {
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListProvidersResponse containing information about all providers.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2645,7 +3114,7 @@
                 "tags": [
                     "Providers"
                 ],
-                "description": "",
+                "description": "List all available providers.",
                 "parameters": []
             }
         },
@@ -2653,7 +3122,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListRoutesResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2678,7 +3147,7 @@
                 "tags": [
                     "Inspect"
                 ],
-                "description": "",
+                "description": "List all routes.",
                 "parameters": []
             }
         },
@@ -2686,7 +3155,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListToolDefsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2711,11 +3180,12 @@
                 "tags": [
                     "ToolRuntime"
                 ],
-                "description": "",
+                "description": "List all tools in the runtime.",
                 "parameters": [
                     {
                         "name": "tool_group_id",
                         "in": "query",
+                        "description": "The ID of the tool group to list tools for.",
                         "required": false,
                         "schema": {
                             "type": "string"
@@ -2724,6 +3194,7 @@
                     {
                         "name": "mcp_endpoint",
                         "in": "query",
+                        "description": "The MCP endpoint to use for the tool group.",
                         "required": false,
                         "schema": {
                             "$ref": "#/components/schemas/URL"
@@ -2736,7 +3207,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListScoringFunctionsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2761,7 +3232,7 @@
                 "tags": [
                     "ScoringFunctions"
                 ],
-                "description": "",
+                "description": "List all scoring functions.",
                 "parameters": []
             },
             "post": {
@@ -2785,7 +3256,7 @@
                 "tags": [
                     "ScoringFunctions"
                 ],
-                "description": "",
+                "description": "Register a scoring function.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2803,7 +3274,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListShieldsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2828,13 +3299,13 @@
                 "tags": [
                     "Shields"
                 ],
-                "description": "",
+                "description": "List all shields.",
                 "parameters": []
             },
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Shield.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2859,7 +3330,7 @@
                 "tags": [
                     "Shields"
                 ],
-                "description": "",
+                "description": "Register a shield.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2877,7 +3348,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListToolGroupsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2902,7 +3373,7 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "List tool groups with optional provider",
+                "description": "List tool groups with optional provider.",
                 "parameters": []
             },
             "post": {
@@ -2926,7 +3397,7 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "Register a tool group",
+                "description": "Register a tool group.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2944,7 +3415,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListToolsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2969,11 +3440,12 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "List tools with optional tool group",
+                "description": "List tools with optional tool group.",
                 "parameters": [
                     {
                         "name": "toolgroup_id",
                         "in": "query",
+                        "description": "The ID of the tool group to list tools for.",
                         "required": false,
                         "schema": {
                             "type": "string"
@@ -2986,7 +3458,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListVectorDBsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3011,13 +3483,13 @@
                 "tags": [
                     "VectorDBs"
                 ],
-                "description": "",
+                "description": "List all vector databases.",
                 "parameters": []
             },
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A VectorDB.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3042,7 +3514,7 @@
                 "tags": [
                     "VectorDBs"
                 ],
-                "description": "",
+                "description": "Register a vector database.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3078,7 +3550,7 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Log an event.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3092,54 +3564,11 @@
                 }
             }
         },
-        "/v1/openai/v1/chat/completions": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletion"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Inference"
-                ],
-                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/OpenaiChatCompletionRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/openai/v1/completions": {
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "An OpenAICompletion.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3178,11 +3607,54 @@
                 }
             }
         },
+        "/v1/openai/v1/embeddings": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIEmbeddingsResponse containing the embeddings.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIEmbeddingsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiEmbeddingsRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/models": {
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A OpenAIListModelsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3207,7 +3679,7 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "List models using the OpenAI API.",
                 "parameters": []
             }
         },
@@ -3215,7 +3687,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PostTrainingJob.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3240,7 +3712,7 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Run preference optimization of a model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3301,7 +3773,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A QueryChunksResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3326,7 +3798,7 @@
                 "tags": [
                     "VectorIO"
                 ],
-                "description": "",
+                "description": "Query chunks from a vector database.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3340,11 +3812,64 @@
                 }
             }
         },
+        "/v1/telemetry/metrics/{metric_name}": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A QueryMetricsResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/QueryMetricsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Telemetry"
+                ],
+                "description": "Query metrics.",
+                "parameters": [
+                    {
+                        "name": "metric_name",
+                        "in": "path",
+                        "description": "The name of the metric to query.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/QueryMetricsRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/telemetry/spans": {
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A QuerySpansResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3369,7 +3894,7 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Query spans.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3387,7 +3912,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A QueryTracesResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3412,7 +3937,7 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Query traces.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3559,7 +4084,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A RunShieldResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3584,7 +4109,7 @@
                 "tags": [
                     "Safety"
                 ],
-                "description": "",
+                "description": "Run a shield.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3620,7 +4145,7 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Save spans to a dataset.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3638,7 +4163,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "ScoreResponse object containing rows and aggregated results",
+                        "description": "A ScoreResponse object containing rows and aggregated results.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3681,7 +4206,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ScoreBatchResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3706,7 +4231,7 @@
                 "tags": [
                     "Scoring"
                 ],
-                "description": "",
+                "description": "Score a batch of rows.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3724,7 +4249,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PostTrainingJob.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3749,7 +4274,7 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Run supervised fine-tuning of a model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3810,7 +4335,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A VersionInfo.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3835,7 +4360,7 @@
                 "tags": [
                     "Inspect"
                 ],
-                "description": "",
+                "description": "Get the version of the service.",
                 "parameters": []
             }
         }
@@ -3901,7 +4426,8 @@
                                     }
                                 ]
                             }
-                        }
+                        },
+                        "description": "The rows to append to the dataset."
                     }
                 },
                 "additionalProperties": false,
@@ -3954,9 +4480,13 @@
                 "properties": {
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "json_schema",
+                            "grammar"
+                        ],
+                        "description": "Must be \"grammar\" to identify this format type",
                         "const": "grammar",
-                        "default": "grammar",
-                        "description": "Must be \"grammar\" to identify this format type"
+                        "default": "grammar"
                     },
                     "bnf": {
                         "type": "object",
@@ -4080,9 +4610,13 @@
                 "properties": {
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "json_schema",
+                            "grammar"
+                        ],
+                        "description": "Must be \"json_schema\" to identify this format type",
                         "const": "json_schema",
-                        "default": "json_schema",
-                        "description": "Must be \"json_schema\" to identify this format type"
+                        "default": "json_schema"
                     },
                     "json_schema": {
                         "type": "object",
@@ -4600,7 +5134,8 @@
                 "type": "object",
                 "properties": {
                     "model_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
                     },
                     "messages_batch": {
                         "type": "array",
@@ -4609,22 +5144,27 @@
                             "items": {
                                 "$ref": "#/components/schemas/Message"
                             }
-                        }
+                        },
+                        "description": "The messages to generate completions for."
                     },
                     "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
+                        "$ref": "#/components/schemas/SamplingParams",
+                        "description": "(Optional) Parameters to control the sampling strategy."
                     },
                     "tools": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/ToolDefinition"
-                        }
+                        },
+                        "description": "(Optional) List of tool definitions available to the model."
                     },
                     "tool_config": {
-                        "$ref": "#/components/schemas/ToolConfig"
+                        "$ref": "#/components/schemas/ToolConfig",
+                        "description": "(Optional) Configuration for tool use."
                     },
                     "response_format": {
-                        "$ref": "#/components/schemas/ResponseFormat"
+                        "$ref": "#/components/schemas/ResponseFormat",
+                        "description": "(Optional) Grammar specification for guided (structured) decoding."
                     },
                     "logprobs": {
                         "type": "object",
@@ -4636,7 +5176,7 @@
                             }
                         },
                         "additionalProperties": false,
-                        "title": "LogProbConfig"
+                        "description": "(Optional) If specified, log probabilities for each token position will be returned."
                     }
                 },
                 "additionalProperties": false,
@@ -4739,19 +5279,23 @@
                 "type": "object",
                 "properties": {
                     "model_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
                     },
                     "content_batch": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/InterleavedContent"
-                        }
+                        },
+                        "description": "The content to generate completions for."
                     },
                     "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
+                        "$ref": "#/components/schemas/SamplingParams",
+                        "description": "(Optional) Parameters to control the sampling strategy."
                     },
                     "response_format": {
-                        "$ref": "#/components/schemas/ResponseFormat"
+                        "$ref": "#/components/schemas/ResponseFormat",
+                        "description": "(Optional) Grammar specification for guided (structured) decoding."
                     },
                     "logprobs": {
                         "type": "object",
@@ -4763,7 +5307,7 @@
                             }
                         },
                         "additionalProperties": false,
-                        "title": "LogProbConfig"
+                        "description": "(Optional) If specified, log probabilities for each token position will be returned."
                     }
                 },
                 "additionalProperties": false,
@@ -4831,7 +5375,8 @@
                 "type": "object",
                 "properties": {
                     "job_uuid": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The UUID of the job to cancel."
                     }
                 },
                 "additionalProperties": false,
@@ -4852,18 +5397,18 @@
                         "items": {
                             "$ref": "#/components/schemas/Message"
                         },
-                        "description": "List of messages in the conversation"
+                        "description": "List of messages in the conversation."
                     },
                     "sampling_params": {
                         "$ref": "#/components/schemas/SamplingParams",
-                        "description": "Parameters to control the sampling strategy"
+                        "description": "Parameters to control the sampling strategy."
                     },
                     "tools": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/ToolDefinition"
                         },
-                        "description": "(Optional) List of tool definitions available to the model"
+                        "description": "(Optional) List of tool definitions available to the model."
                     },
                     "tool_choice": {
                         "type": "string",
@@ -5083,15 +5628,15 @@
                     },
                     "content": {
                         "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The content to generate a completion for"
+                        "description": "The content to generate a completion for."
                     },
                     "sampling_params": {
                         "$ref": "#/components/schemas/SamplingParams",
-                        "description": "(Optional) Parameters to control the sampling strategy"
+                        "description": "(Optional) Parameters to control the sampling strategy."
                     },
                     "response_format": {
                         "$ref": "#/components/schemas/ResponseFormat",
-                        "description": "(Optional) Grammar specification for guided (structured) decoding"
+                        "description": "(Optional) Grammar specification for guided (structured) decoding."
                     },
                     "stream": {
                         "type": "boolean",
@@ -5214,17 +5759,25 @@
                         "default": 10
                     },
                     "model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The model identifier to use for the agent"
                     },
                     "instructions": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The system instructions for the agent"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "Optional name for the agent, used in telemetry and identification"
                     },
                     "enable_session_persistence": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "Optional flag indicating whether session data has to be persisted"
                     },
                     "response_format": {
-                        "$ref": "#/components/schemas/ResponseFormat"
+                        "$ref": "#/components/schemas/ResponseFormat",
+                        "description": "Optional response format configuration"
                     }
                 },
                 "additionalProperties": false,
@@ -5232,7 +5785,8 @@
                     "model",
                     "instructions"
                 ],
-                "title": "AgentConfig"
+                "title": "AgentConfig",
+                "description": "Configuration for an agent."
             },
             "AgentTool": {
                 "oneOf": [
@@ -5531,6 +6085,14 @@
                     },
                     "step_type": {
                         "type": "string",
+                        "enum": [
+                            "inference",
+                            "tool_execution",
+                            "shield_call",
+                            "memory_retrieval"
+                        ],
+                        "title": "StepType",
+                        "description": "Type of the step in an agent turn.",
                         "const": "inference",
                         "default": "inference"
                     },
@@ -5572,6 +6134,14 @@
                     },
                     "step_type": {
                         "type": "string",
+                        "enum": [
+                            "inference",
+                            "tool_execution",
+                            "shield_call",
+                            "memory_retrieval"
+                        ],
+                        "title": "StepType",
+                        "description": "Type of the step in an agent turn.",
                         "const": "memory_retrieval",
                         "default": "memory_retrieval"
                     },
@@ -5660,6 +6230,14 @@
                     },
                     "step_type": {
                         "type": "string",
+                        "enum": [
+                            "inference",
+                            "tool_execution",
+                            "shield_call",
+                            "memory_retrieval"
+                        ],
+                        "title": "StepType",
+                        "description": "Type of the step in an agent turn.",
                         "const": "shield_call",
                         "default": "shield_call"
                     },
@@ -5700,6 +6278,14 @@
                     },
                     "step_type": {
                         "type": "string",
+                        "enum": [
+                            "inference",
+                            "tool_execution",
+                            "shield_call",
+                            "memory_retrieval"
+                        ],
+                        "title": "StepType",
+                        "description": "Type of the step in an agent turn.",
                         "const": "tool_execution",
                         "default": "tool_execution"
                     },
@@ -5962,6 +6548,15 @@
                 "properties": {
                     "event_type": {
                         "type": "string",
+                        "enum": [
+                            "step_start",
+                            "step_complete",
+                            "step_progress",
+                            "turn_start",
+                            "turn_complete",
+                            "turn_awaiting_input"
+                        ],
+                        "title": "AgentTurnResponseEventType",
                         "const": "step_complete",
                         "default": "step_complete"
                     },
@@ -6019,6 +6614,15 @@
                 "properties": {
                     "event_type": {
                         "type": "string",
+                        "enum": [
+                            "step_start",
+                            "step_complete",
+                            "step_progress",
+                            "turn_start",
+                            "turn_complete",
+                            "turn_awaiting_input"
+                        ],
+                        "title": "AgentTurnResponseEventType",
                         "const": "step_progress",
                         "default": "step_progress"
                     },
@@ -6054,6 +6658,15 @@
                 "properties": {
                     "event_type": {
                         "type": "string",
+                        "enum": [
+                            "step_start",
+                            "step_complete",
+                            "step_progress",
+                            "turn_start",
+                            "turn_complete",
+                            "turn_awaiting_input"
+                        ],
+                        "title": "AgentTurnResponseEventType",
                         "const": "step_start",
                         "default": "step_start"
                     },
@@ -6124,6 +6737,15 @@
                 "properties": {
                     "event_type": {
                         "type": "string",
+                        "enum": [
+                            "step_start",
+                            "step_complete",
+                            "step_progress",
+                            "turn_start",
+                            "turn_complete",
+                            "turn_awaiting_input"
+                        ],
+                        "title": "AgentTurnResponseEventType",
                         "const": "turn_awaiting_input",
                         "default": "turn_awaiting_input"
                     },
@@ -6143,6 +6765,15 @@
                 "properties": {
                     "event_type": {
                         "type": "string",
+                        "enum": [
+                            "step_start",
+                            "step_complete",
+                            "step_progress",
+                            "turn_start",
+                            "turn_complete",
+                            "turn_awaiting_input"
+                        ],
+                        "title": "AgentTurnResponseEventType",
                         "const": "turn_complete",
                         "default": "turn_complete"
                     },
@@ -6162,6 +6793,15 @@
                 "properties": {
                     "event_type": {
                         "type": "string",
+                        "enum": [
+                            "step_start",
+                            "step_complete",
+                            "step_progress",
+                            "turn_start",
+                            "turn_complete",
+                            "turn_awaiting_input"
+                        ],
+                        "title": "AgentTurnResponseEventType",
                         "const": "turn_start",
                         "default": "turn_start"
                     },
@@ -6176,24 +6816,880 @@
                 ],
                 "title": "AgentTurnResponseTurnStartPayload"
             },
+            "OpenAIResponseInput": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseMessage"
+                    }
+                ]
+            },
+            "OpenAIResponseInputFunctionToolCallOutput": {
+                "type": "object",
+                "properties": {
+                    "call_id": {
+                        "type": "string"
+                    },
+                    "output": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "function_call_output",
+                        "default": "function_call_output"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "call_id",
+                    "output",
+                    "type"
+                ],
+                "title": "OpenAIResponseInputFunctionToolCallOutput",
+                "description": "This represents the output of a function call that gets passed back to the model."
+            },
+            "OpenAIResponseInputMessageContent": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
+                        "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                    }
+                }
+            },
+            "OpenAIResponseInputMessageContentImage": {
+                "type": "object",
+                "properties": {
+                    "detail": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "low"
+                            },
+                            {
+                                "type": "string",
+                                "const": "high"
+                            },
+                            {
+                                "type": "string",
+                                "const": "auto"
+                            }
+                        ],
+                        "default": "auto"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "input_image",
+                        "default": "input_image"
+                    },
+                    "image_url": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "detail",
+                    "type"
+                ],
+                "title": "OpenAIResponseInputMessageContentImage"
+            },
+            "OpenAIResponseInputMessageContentText": {
+                "type": "object",
+                "properties": {
+                    "text": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "input_text",
+                        "default": "input_text"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "text",
+                    "type"
+                ],
+                "title": "OpenAIResponseInputMessageContentText"
+            },
+            "OpenAIResponseInputTool": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputToolWebSearch"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputToolFileSearch"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputToolFunction"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputToolMCP"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch",
+                        "file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch",
+                        "function": "#/components/schemas/OpenAIResponseInputToolFunction",
+                        "mcp": "#/components/schemas/OpenAIResponseInputToolMCP"
+                    }
+                }
+            },
+            "OpenAIResponseInputToolFileSearch": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file_search",
+                        "default": "file_search"
+                    },
+                    "vector_store_id": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    },
+                    "ranking_options": {
+                        "type": "object",
+                        "properties": {
+                            "ranker": {
+                                "type": "string"
+                            },
+                            "score_threshold": {
+                                "type": "number",
+                                "default": 0.0
+                            }
+                        },
+                        "additionalProperties": false,
+                        "title": "FileSearchRankingOptions"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "vector_store_id"
+                ],
+                "title": "OpenAIResponseInputToolFileSearch"
+            },
+            "OpenAIResponseInputToolFunction": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "function",
+                        "default": "function"
+                    },
+                    "name": {
+                        "type": "string"
+                    },
+                    "description": {
+                        "type": "string"
+                    },
+                    "parameters": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "strict": {
+                        "type": "boolean"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "name"
+                ],
+                "title": "OpenAIResponseInputToolFunction"
+            },
+            "OpenAIResponseInputToolMCP": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "mcp",
+                        "default": "mcp"
+                    },
+                    "server_label": {
+                        "type": "string"
+                    },
+                    "server_url": {
+                        "type": "string"
+                    },
+                    "headers": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "require_approval": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "always"
+                            },
+                            {
+                                "type": "string",
+                                "const": "never"
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "always": {
+                                        "type": "array",
+                                        "items": {
+                                            "type": "string"
+                                        }
+                                    },
+                                    "never": {
+                                        "type": "array",
+                                        "items": {
+                                            "type": "string"
+                                        }
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "title": "ApprovalFilter"
+                            }
+                        ],
+                        "default": "never"
+                    },
+                    "allowed_tools": {
+                        "oneOf": [
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "tool_names": {
+                                        "type": "array",
+                                        "items": {
+                                            "type": "string"
+                                        }
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "title": "AllowedToolsFilter"
+                            }
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "server_label",
+                    "server_url",
+                    "require_approval"
+                ],
+                "title": "OpenAIResponseInputToolMCP"
+            },
+            "OpenAIResponseInputToolWebSearch": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "web_search"
+                            },
+                            {
+                                "type": "string",
+                                "const": "web_search_preview_2025_03_11"
+                            }
+                        ],
+                        "default": "web_search"
+                    },
+                    "search_context_size": {
+                        "type": "string",
+                        "default": "medium"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "OpenAIResponseInputToolWebSearch"
+            },
+            "OpenAIResponseMessage": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
+                                }
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
+                                }
+                            }
+                        ]
+                    },
+                    "role": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "system"
+                            },
+                            {
+                                "type": "string",
+                                "const": "developer"
+                            },
+                            {
+                                "type": "string",
+                                "const": "user"
+                            },
+                            {
+                                "type": "string",
+                                "const": "assistant"
+                            }
+                        ]
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "message",
+                        "default": "message"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "role",
+                    "type"
+                ],
+                "title": "OpenAIResponseMessage",
+                "description": "Corresponds to the various Message types in the Responses API. They are all under one type because the Responses API gives them all the same \"type\" value, and there is no way to tell them apart in certain scenarios."
+            },
+            "OpenAIResponseOutputMessageContent": {
+                "type": "object",
+                "properties": {
+                    "text": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "output_text",
+                        "default": "output_text"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "text",
+                    "type"
+                ],
+                "title": "OpenAIResponseOutputMessageContentOutputText"
+            },
+            "OpenAIResponseOutputMessageFunctionToolCall": {
+                "type": "object",
+                "properties": {
+                    "call_id": {
+                        "type": "string"
+                    },
+                    "name": {
+                        "type": "string"
+                    },
+                    "arguments": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "function_call",
+                        "default": "function_call"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "call_id",
+                    "name",
+                    "arguments",
+                    "type"
+                ],
+                "title": "OpenAIResponseOutputMessageFunctionToolCall"
+            },
+            "OpenAIResponseOutputMessageWebSearchToolCall": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "web_search_call",
+                        "default": "web_search_call"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "status",
+                    "type"
+                ],
+                "title": "OpenAIResponseOutputMessageWebSearchToolCall"
+            },
+            "CreateOpenaiResponseRequest": {
+                "type": "object",
+                "properties": {
+                    "input": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "Input message(s) to create the response."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The underlying LLM used for completions."
+                    },
+                    "instructions": {
+                        "type": "string"
+                    },
+                    "previous_response_id": {
+                        "type": "string",
+                        "description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
+                    },
+                    "store": {
+                        "type": "boolean"
+                    },
+                    "stream": {
+                        "type": "boolean"
+                    },
+                    "temperature": {
+                        "type": "number"
+                    },
+                    "tools": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInputTool"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input",
+                    "model"
+                ],
+                "title": "CreateOpenaiResponseRequest"
+            },
+            "OpenAIResponseError": {
+                "type": "object",
+                "properties": {
+                    "code": {
+                        "type": "string"
+                    },
+                    "message": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "code",
+                    "message"
+                ],
+                "title": "OpenAIResponseError"
+            },
+            "OpenAIResponseObject": {
+                "type": "object",
+                "properties": {
+                    "created_at": {
+                        "type": "integer"
+                    },
+                    "error": {
+                        "$ref": "#/components/schemas/OpenAIResponseError"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "response",
+                        "default": "response"
+                    },
+                    "output": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseOutput"
+                        }
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean",
+                        "default": false
+                    },
+                    "previous_response_id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "temperature": {
+                        "type": "number"
+                    },
+                    "top_p": {
+                        "type": "number"
+                    },
+                    "truncation": {
+                        "type": "string"
+                    },
+                    "user": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "created_at",
+                    "id",
+                    "model",
+                    "object",
+                    "output",
+                    "parallel_tool_calls",
+                    "status"
+                ],
+                "title": "OpenAIResponseObject"
+            },
+            "OpenAIResponseOutput": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseMessage"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "message": "#/components/schemas/OpenAIResponseMessage",
+                        "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall",
+                        "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
+                        "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
+                        "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                    }
+                }
+            },
+            "OpenAIResponseOutputMessageMCPCall": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_call",
+                        "default": "mcp_call"
+                    },
+                    "arguments": {
+                        "type": "string"
+                    },
+                    "name": {
+                        "type": "string"
+                    },
+                    "server_label": {
+                        "type": "string"
+                    },
+                    "error": {
+                        "type": "string"
+                    },
+                    "output": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "type",
+                    "arguments",
+                    "name",
+                    "server_label"
+                ],
+                "title": "OpenAIResponseOutputMessageMCPCall"
+            },
+            "OpenAIResponseOutputMessageMCPListTools": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_list_tools",
+                        "default": "mcp_list_tools"
+                    },
+                    "server_label": {
+                        "type": "string"
+                    },
+                    "tools": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "input_schema": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                },
+                                "name": {
+                                    "type": "string"
+                                },
+                                "description": {
+                                    "type": "string"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "input_schema",
+                                "name"
+                            ],
+                            "title": "MCPListToolsTool"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "type",
+                    "server_label",
+                    "tools"
+                ],
+                "title": "OpenAIResponseOutputMessageMCPListTools"
+            },
+            "OpenAIResponseObjectStream": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
+                        "response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta",
+                        "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
+                    }
+                }
+            },
+            "OpenAIResponseObjectStreamResponseCompleted": {
+                "type": "object",
+                "properties": {
+                    "response": {
+                        "$ref": "#/components/schemas/OpenAIResponseObject"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "response.completed",
+                        "default": "response.completed"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "response",
+                    "type"
+                ],
+                "title": "OpenAIResponseObjectStreamResponseCompleted"
+            },
+            "OpenAIResponseObjectStreamResponseCreated": {
+                "type": "object",
+                "properties": {
+                    "response": {
+                        "$ref": "#/components/schemas/OpenAIResponseObject"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "response.created",
+                        "default": "response.created"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "response",
+                    "type"
+                ],
+                "title": "OpenAIResponseObjectStreamResponseCreated"
+            },
+            "OpenAIResponseObjectStreamResponseOutputTextDelta": {
+                "type": "object",
+                "properties": {
+                    "content_index": {
+                        "type": "integer"
+                    },
+                    "delta": {
+                        "type": "string"
+                    },
+                    "item_id": {
+                        "type": "string"
+                    },
+                    "output_index": {
+                        "type": "integer"
+                    },
+                    "sequence_number": {
+                        "type": "integer"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "response.output_text.delta",
+                        "default": "response.output_text.delta"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content_index",
+                    "delta",
+                    "item_id",
+                    "output_index",
+                    "sequence_number",
+                    "type"
+                ],
+                "title": "OpenAIResponseObjectStreamResponseOutputTextDelta"
+            },
             "CreateUploadSessionRequest": {
                 "type": "object",
                 "properties": {
                     "bucket": {
                         "type": "string",
-                        "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
+                        "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)."
                     },
                     "key": {
                         "type": "string",
-                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)."
                     },
                     "mime_type": {
                         "type": "string",
-                        "description": "MIME type of the file"
+                        "description": "MIME type of the file."
                     },
                     "size": {
                         "type": "integer",
-                        "description": "File size in bytes"
+                        "description": "File size in bytes."
                     }
                 },
                 "additionalProperties": false,
@@ -6345,7 +7841,7 @@
                 "type": "object",
                 "properties": {
                     "type": {
-                        "type": "string",
+                        "$ref": "#/components/schemas/ScoringFnParamsType",
                         "const": "basic",
                         "default": "basic"
                     },
@@ -6358,7 +7854,8 @@
                 },
                 "additionalProperties": false,
                 "required": [
-                    "type"
+                    "type",
+                    "aggregation_functions"
                 ],
                 "title": "BasicScoringFnParams"
             },
@@ -6410,7 +7907,7 @@
                 "type": "object",
                 "properties": {
                     "type": {
-                        "type": "string",
+                        "$ref": "#/components/schemas/ScoringFnParamsType",
                         "const": "llm_as_judge",
                         "default": "llm_as_judge"
                     },
@@ -6436,7 +7933,9 @@
                 "additionalProperties": false,
                 "required": [
                     "type",
-                    "judge_model"
+                    "judge_model",
+                    "judge_score_regexes",
+                    "aggregation_functions"
                 ],
                 "title": "LLMAsJudgeScoringFnParams"
             },
@@ -6474,7 +7973,7 @@
                 "type": "object",
                 "properties": {
                     "type": {
-                        "type": "string",
+                        "$ref": "#/components/schemas/ScoringFnParamsType",
                         "const": "regex_parser",
                         "default": "regex_parser"
                     },
@@ -6493,7 +7992,9 @@
                 },
                 "additionalProperties": false,
                 "required": [
-                    "type"
+                    "type",
+                    "parsing_regexes",
+                    "aggregation_functions"
                 ],
                 "title": "RegexParserScoringFnParams"
             },
@@ -6518,6 +8019,15 @@
                     }
                 }
             },
+            "ScoringFnParamsType": {
+                "type": "string",
+                "enum": [
+                    "llm_as_judge",
+                    "regex_parser",
+                    "basic"
+                ],
+                "title": "ScoringFnParamsType"
+            },
             "EvaluateRowsRequest": {
                 "type": "object",
                 "properties": {
@@ -6786,6 +8296,17 @@
                     },
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group"
+                        ],
+                        "title": "ResourceType",
                         "const": "benchmark",
                         "default": "benchmark"
                     },
@@ -6827,7 +8348,6 @@
                 "additionalProperties": false,
                 "required": [
                     "identifier",
-                    "provider_resource_id",
                     "provider_id",
                     "type",
                     "dataset_id",
@@ -6836,6 +8356,482 @@
                 ],
                 "title": "Benchmark"
             },
+            "OpenAIAssistantMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "assistant",
+                        "default": "assistant",
+                        "description": "Must be \"assistant\" to identify this as the model's response"
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                }
+                            }
+                        ],
+                        "description": "The content of the model's response"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "(Optional) The name of the assistant message participant."
+                    },
+                    "tool_calls": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
+                        },
+                        "description": "List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role"
+                ],
+                "title": "OpenAIAssistantMessageParam",
+                "description": "A message containing the model's (assistant) response in an OpenAI-compatible chat completion request."
+            },
+            "OpenAIChatCompletionContentPartImageParam": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "image_url",
+                        "default": "image_url"
+                    },
+                    "image_url": {
+                        "$ref": "#/components/schemas/OpenAIImageURL"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "image_url"
+                ],
+                "title": "OpenAIChatCompletionContentPartImageParam"
+            },
+            "OpenAIChatCompletionContentPartParam": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
+                        "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                    }
+                }
+            },
+            "OpenAIChatCompletionContentPartTextParam": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "text",
+                        "default": "text"
+                    },
+                    "text": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "text"
+                ],
+                "title": "OpenAIChatCompletionContentPartTextParam"
+            },
+            "OpenAIChatCompletionToolCall": {
+                "type": "object",
+                "properties": {
+                    "index": {
+                        "type": "integer"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "function",
+                        "default": "function"
+                    },
+                    "function": {
+                        "$ref": "#/components/schemas/OpenAIChatCompletionToolCallFunction"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "OpenAIChatCompletionToolCall"
+            },
+            "OpenAIChatCompletionToolCallFunction": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string"
+                    },
+                    "arguments": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "OpenAIChatCompletionToolCallFunction"
+            },
+            "OpenAIChoice": {
+                "type": "object",
+                "properties": {
+                    "message": {
+                        "$ref": "#/components/schemas/OpenAIMessageParam",
+                        "description": "The message from the model"
+                    },
+                    "finish_reason": {
+                        "type": "string",
+                        "description": "The reason the model stopped generating"
+                    },
+                    "index": {
+                        "type": "integer",
+                        "description": "The index of the choice"
+                    },
+                    "logprobs": {
+                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs",
+                        "description": "(Optional) The log probabilities for the tokens in the message"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "message",
+                    "finish_reason",
+                    "index"
+                ],
+                "title": "OpenAIChoice",
+                "description": "A choice from an OpenAI-compatible chat completion response."
+            },
+            "OpenAIChoiceLogprobs": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAITokenLogProb"
+                        },
+                        "description": "(Optional) The log probabilities for the tokens in the message"
+                    },
+                    "refusal": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAITokenLogProb"
+                        },
+                        "description": "(Optional) The log probabilities for the tokens in the message"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "OpenAIChoiceLogprobs",
+                "description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
+            },
+            "OpenAIDeveloperMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "developer",
+                        "default": "developer",
+                        "description": "Must be \"developer\" to identify this as a developer message"
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                }
+                            }
+                        ],
+                        "description": "The content of the developer message"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "(Optional) The name of the developer message participant."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "content"
+                ],
+                "title": "OpenAIDeveloperMessageParam",
+                "description": "A message from the developer in an OpenAI-compatible chat completion request."
+            },
+            "OpenAIImageURL": {
+                "type": "object",
+                "properties": {
+                    "url": {
+                        "type": "string"
+                    },
+                    "detail": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "url"
+                ],
+                "title": "OpenAIImageURL"
+            },
+            "OpenAIMessageParam": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIUserMessageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAISystemMessageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIAssistantMessageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIToolMessageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIDeveloperMessageParam"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "role",
+                    "mapping": {
+                        "user": "#/components/schemas/OpenAIUserMessageParam",
+                        "system": "#/components/schemas/OpenAISystemMessageParam",
+                        "assistant": "#/components/schemas/OpenAIAssistantMessageParam",
+                        "tool": "#/components/schemas/OpenAIToolMessageParam",
+                        "developer": "#/components/schemas/OpenAIDeveloperMessageParam"
+                    }
+                }
+            },
+            "OpenAISystemMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "system",
+                        "default": "system",
+                        "description": "Must be \"system\" to identify this as a system message"
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                }
+                            }
+                        ],
+                        "description": "The content of the \"system prompt\". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions)."
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "(Optional) The name of the system message participant."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "content"
+                ],
+                "title": "OpenAISystemMessageParam",
+                "description": "A system message providing instructions or context to the model."
+            },
+            "OpenAITokenLogProb": {
+                "type": "object",
+                "properties": {
+                    "token": {
+                        "type": "string"
+                    },
+                    "bytes": {
+                        "type": "array",
+                        "items": {
+                            "type": "integer"
+                        }
+                    },
+                    "logprob": {
+                        "type": "number"
+                    },
+                    "top_logprobs": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAITopLogProb"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "token",
+                    "logprob",
+                    "top_logprobs"
+                ],
+                "title": "OpenAITokenLogProb",
+                "description": "The log probability for a token from an OpenAI-compatible chat completion response."
+            },
+            "OpenAIToolMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "tool",
+                        "default": "tool",
+                        "description": "Must be \"tool\" to identify this as a tool response"
+                    },
+                    "tool_call_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the tool call this response is for"
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                }
+                            }
+                        ],
+                        "description": "The response content from the tool"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "tool_call_id",
+                    "content"
+                ],
+                "title": "OpenAIToolMessageParam",
+                "description": "A message representing the result of a tool invocation in an OpenAI-compatible chat completion request."
+            },
+            "OpenAITopLogProb": {
+                "type": "object",
+                "properties": {
+                    "token": {
+                        "type": "string"
+                    },
+                    "bytes": {
+                        "type": "array",
+                        "items": {
+                            "type": "integer"
+                        }
+                    },
+                    "logprob": {
+                        "type": "number"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "token",
+                    "logprob"
+                ],
+                "title": "OpenAITopLogProb",
+                "description": "The top log probability for a token from an OpenAI-compatible chat completion response."
+            },
+            "OpenAIUserMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "user",
+                        "default": "user",
+                        "description": "Must be \"user\" to identify this as a user message"
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                }
+                            }
+                        ],
+                        "description": "The content of the message, which can include text and other media"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "(Optional) The name of the user message participant."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "content"
+                ],
+                "title": "OpenAIUserMessageParam",
+                "description": "A message from the user in an OpenAI-compatible chat completion request."
+            },
+            "OpenAICompletionWithInputMessages": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "The ID of the chat completion"
+                    },
+                    "choices": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIChoice"
+                        },
+                        "description": "List of choices"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "chat.completion",
+                        "default": "chat.completion",
+                        "description": "The object type, which will be \"chat.completion\""
+                    },
+                    "created": {
+                        "type": "integer",
+                        "description": "The Unix timestamp in seconds when the chat completion was created"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model that was used to generate the chat completion"
+                    },
+                    "input_messages": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIMessageParam"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "choices",
+                    "object",
+                    "created",
+                    "model",
+                    "input_messages"
+                ],
+                "title": "OpenAICompletionWithInputMessages"
+            },
             "DataSource": {
                 "oneOf": [
                     {
@@ -6867,6 +8863,17 @@
                     },
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group"
+                        ],
+                        "title": "ResourceType",
                         "const": "dataset",
                         "default": "dataset"
                     },
@@ -6912,7 +8919,6 @@
                 "additionalProperties": false,
                 "required": [
                     "identifier",
-                    "provider_resource_id",
                     "provider_id",
                     "type",
                     "purpose",
@@ -7042,6 +9048,17 @@
                     },
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group"
+                        ],
+                        "title": "ResourceType",
                         "const": "model",
                         "default": "model"
                     },
@@ -7078,7 +9095,6 @@
                 "additionalProperties": false,
                 "required": [
                     "identifier",
-                    "provider_resource_id",
                     "provider_id",
                     "type",
                     "metadata",
@@ -7277,6 +9293,17 @@
                     },
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group"
+                        ],
+                        "title": "ResourceType",
                         "const": "scoring_function",
                         "default": "scoring_function"
                     },
@@ -7318,7 +9345,6 @@
                 "additionalProperties": false,
                 "required": [
                     "identifier",
-                    "provider_resource_id",
                     "provider_id",
                     "type",
                     "metadata",
@@ -7370,6 +9396,17 @@
                     },
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group"
+                        ],
+                        "title": "ResourceType",
                         "const": "shield",
                         "default": "shield"
                     },
@@ -7402,7 +9439,6 @@
                 "additionalProperties": false,
                 "required": [
                     "identifier",
-                    "provider_resource_id",
                     "provider_id",
                     "type"
                 ],
@@ -7474,10 +9510,12 @@
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The attributes to return in the tree."
                     },
                     "max_depth": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
                     }
                 },
                 "additionalProperties": false,
@@ -7582,15 +9620,23 @@
                     },
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group"
+                        ],
+                        "title": "ResourceType",
                         "const": "tool",
                         "default": "tool"
                     },
                     "toolgroup_id": {
                         "type": "string"
                     },
-                    "tool_host": {
-                        "$ref": "#/components/schemas/ToolHost"
-                    },
                     "description": {
                         "type": "string"
                     },
@@ -7629,25 +9675,14 @@
                 "additionalProperties": false,
                 "required": [
                     "identifier",
-                    "provider_resource_id",
                     "provider_id",
                     "type",
                     "toolgroup_id",
-                    "tool_host",
                     "description",
                     "parameters"
                 ],
                 "title": "Tool"
             },
-            "ToolHost": {
-                "type": "string",
-                "enum": [
-                    "distribution",
-                    "client",
-                    "model_context_protocol"
-                ],
-                "title": "ToolHost"
-            },
             "ToolGroup": {
                 "type": "object",
                 "properties": {
@@ -7662,6 +9697,17 @@
                     },
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group"
+                        ],
+                        "title": "ResourceType",
                         "const": "tool_group",
                         "default": "tool_group"
                     },
@@ -7697,7 +9743,6 @@
                 "additionalProperties": false,
                 "required": [
                     "identifier",
-                    "provider_resource_id",
                     "provider_id",
                     "type"
                 ],
@@ -7864,6 +9909,17 @@
                     },
                     "type": {
                         "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group"
+                        ],
+                        "title": "ResourceType",
                         "const": "vector_db",
                         "default": "vector_db"
                     },
@@ -7877,7 +9933,6 @@
                 "additionalProperties": false,
                 "required": [
                     "identifier",
-                    "provider_resource_id",
                     "provider_id",
                     "type",
                     "embedding_model",
@@ -7889,7 +9944,13 @@
                 "type": "object",
                 "properties": {
                     "status": {
-                        "type": "string"
+                        "type": "string",
+                        "enum": [
+                            "OK",
+                            "Error",
+                            "Not Implemented"
+                        ],
+                        "title": "HealthStatus"
                     }
                 },
                 "additionalProperties": false,
@@ -7993,7 +10054,8 @@
                 "type": "object",
                 "properties": {
                     "vector_db_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the vector database to insert the chunks into."
                     },
                     "chunks": {
                         "type": "array",
@@ -8001,7 +10063,8 @@
                             "type": "object",
                             "properties": {
                                 "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent"
+                                    "$ref": "#/components/schemas/InterleavedContent",
+                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
                                 },
                                 "metadata": {
                                     "type": "object",
@@ -8026,7 +10089,15 @@
                                                 "type": "object"
                                             }
                                         ]
-                                    }
+                                    },
+                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
+                                },
+                                "embedding": {
+                                    "type": "array",
+                                    "items": {
+                                        "type": "number"
+                                    },
+                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                                 }
                             },
                             "additionalProperties": false,
@@ -8034,11 +10105,14 @@
                                 "content",
                                 "metadata"
                             ],
-                            "title": "Chunk"
-                        }
+                            "title": "Chunk",
+                            "description": "A chunk of content that can be inserted into a vector database."
+                        },
+                        "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
                     },
                     "ttl_seconds": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The time to live of the chunks."
                     }
                 },
                 "additionalProperties": false,
@@ -8084,6 +10158,31 @@
                                 }
                             ]
                         }
+                    },
+                    "health": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
                     }
                 },
                 "additionalProperties": false,
@@ -8091,7 +10190,8 @@
                     "api",
                     "provider_id",
                     "provider_type",
-                    "config"
+                    "config",
+                    "health"
                 ],
                 "title": "ProviderInfo"
             },
@@ -8099,7 +10199,8 @@
                 "type": "object",
                 "properties": {
                     "tool_name": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The name of the tool to invoke."
                     },
                     "kwargs": {
                         "type": "object",
@@ -8124,7 +10225,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "A dictionary of arguments to pass to the tool."
                     }
                 },
                 "additionalProperties": false,
@@ -8245,38 +10347,6 @@
                 ],
                 "title": "Job"
             },
-            "ListAgentSessionsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Session"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListAgentSessionsResponse"
-            },
-            "ListAgentsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Agent"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListAgentsResponse"
-            },
             "BucketResponse": {
                 "type": "object",
                 "properties": {
@@ -8324,6 +10394,91 @@
                 ],
                 "title": "ListBenchmarksResponse"
             },
+            "Order": {
+                "type": "string",
+                "enum": [
+                    "asc",
+                    "desc"
+                ],
+                "title": "Order"
+            },
+            "ListOpenAIChatCompletionResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "id": {
+                                    "type": "string",
+                                    "description": "The ID of the chat completion"
+                                },
+                                "choices": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/OpenAIChoice"
+                                    },
+                                    "description": "List of choices"
+                                },
+                                "object": {
+                                    "type": "string",
+                                    "const": "chat.completion",
+                                    "default": "chat.completion",
+                                    "description": "The object type, which will be \"chat.completion\""
+                                },
+                                "created": {
+                                    "type": "integer",
+                                    "description": "The Unix timestamp in seconds when the chat completion was created"
+                                },
+                                "model": {
+                                    "type": "string",
+                                    "description": "The model that was used to generate the chat completion"
+                                },
+                                "input_messages": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/OpenAIMessageParam"
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "id",
+                                "choices",
+                                "object",
+                                "created",
+                                "model",
+                                "input_messages"
+                            ],
+                            "title": "OpenAICompletionWithInputMessages"
+                        }
+                    },
+                    "has_more": {
+                        "type": "boolean"
+                    },
+                    "first_id": {
+                        "type": "string"
+                    },
+                    "last_id": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more",
+                    "first_id",
+                    "last_id",
+                    "object"
+                ],
+                "title": "ListOpenAIChatCompletionResponse"
+            },
             "ListDatasetsResponse": {
                 "type": "object",
                 "properties": {
@@ -8374,6 +10529,130 @@
                 ],
                 "title": "ListModelsResponse"
             },
+            "ListOpenAIResponseInputItem": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInput"
+                        }
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "object"
+                ],
+                "title": "ListOpenAIResponseInputItem"
+            },
+            "ListOpenAIResponseObject": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseObjectWithInput"
+                        }
+                    },
+                    "has_more": {
+                        "type": "boolean"
+                    },
+                    "first_id": {
+                        "type": "string"
+                    },
+                    "last_id": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more",
+                    "first_id",
+                    "last_id",
+                    "object"
+                ],
+                "title": "ListOpenAIResponseObject"
+            },
+            "OpenAIResponseObjectWithInput": {
+                "type": "object",
+                "properties": {
+                    "created_at": {
+                        "type": "integer"
+                    },
+                    "error": {
+                        "$ref": "#/components/schemas/OpenAIResponseError"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "response",
+                        "default": "response"
+                    },
+                    "output": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseOutput"
+                        }
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean",
+                        "default": false
+                    },
+                    "previous_response_id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "temperature": {
+                        "type": "number"
+                    },
+                    "top_p": {
+                        "type": "number"
+                    },
+                    "truncation": {
+                        "type": "string"
+                    },
+                    "user": {
+                        "type": "string"
+                    },
+                    "input": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInput"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "created_at",
+                    "id",
+                    "model",
+                    "object",
+                    "output",
+                    "parallel_tool_calls",
+                    "status",
+                    "input"
+                ],
+                "title": "OpenAIResponseObjectWithInput"
+            },
             "ListProvidersResponse": {
                 "type": "object",
                 "properties": {
@@ -8547,6 +10826,15 @@
                     }
                 }
             },
+            "EventType": {
+                "type": "string",
+                "enum": [
+                    "unstructured_log",
+                    "structured_log",
+                    "metric"
+                ],
+                "title": "EventType"
+            },
             "LogSeverity": {
                 "type": "string",
                 "enum": [
@@ -8595,7 +10883,7 @@
                         }
                     },
                     "type": {
-                        "type": "string",
+                        "$ref": "#/components/schemas/EventType",
                         "const": "metric",
                         "default": "metric"
                     },
@@ -8632,7 +10920,7 @@
                 "type": "object",
                 "properties": {
                     "type": {
-                        "type": "string",
+                        "$ref": "#/components/schemas/StructuredLogType",
                         "const": "span_end",
                         "default": "span_end"
                     },
@@ -8651,7 +10939,7 @@
                 "type": "object",
                 "properties": {
                     "type": {
-                        "type": "string",
+                        "$ref": "#/components/schemas/StructuredLogType",
                         "const": "span_start",
                         "default": "span_start"
                     },
@@ -8705,7 +10993,7 @@
                         }
                     },
                     "type": {
-                        "type": "string",
+                        "$ref": "#/components/schemas/EventType",
                         "const": "structured_log",
                         "default": "structured_log"
                     },
@@ -8740,6 +11028,14 @@
                     }
                 }
             },
+            "StructuredLogType": {
+                "type": "string",
+                "enum": [
+                    "span_start",
+                    "span_end"
+                ],
+                "title": "StructuredLogType"
+            },
             "UnstructuredLogEvent": {
                 "type": "object",
                 "properties": {
@@ -8776,7 +11072,7 @@
                         }
                     },
                     "type": {
-                        "type": "string",
+                        "$ref": "#/components/schemas/EventType",
                         "const": "unstructured_log",
                         "default": "unstructured_log"
                     },
@@ -8802,10 +11098,12 @@
                 "type": "object",
                 "properties": {
                     "event": {
-                        "$ref": "#/components/schemas/Event"
+                        "$ref": "#/components/schemas/Event",
+                        "description": "The event to log."
                     },
                     "ttl_seconds": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The time to live of the event."
                     }
                 },
                 "additionalProperties": false,
@@ -8815,172 +11113,119 @@
                 ],
                 "title": "LogEventRequest"
             },
-            "OpenAIAssistantMessageParam": {
+            "OpenAIJSONSchema": {
                 "type": "object",
                 "properties": {
-                    "role": {
-                        "type": "string",
-                        "const": "assistant",
-                        "default": "assistant",
-                        "description": "Must be \"assistant\" to identify this as the model's response"
-                    },
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The content of the model's response"
-                    },
                     "name": {
-                        "type": "string",
-                        "description": "(Optional) The name of the assistant message participant."
+                        "type": "string"
                     },
-                    "tool_calls": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ToolCall"
-                        },
-                        "description": "List of tool calls. Each tool call is a ToolCall object."
+                    "description": {
+                        "type": "string"
+                    },
+                    "strict": {
+                        "type": "boolean"
+                    },
+                    "schema": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "role",
-                    "content"
+                    "name"
                 ],
-                "title": "OpenAIAssistantMessageParam",
-                "description": "A message containing the model's (assistant) response in an OpenAI-compatible chat completion request."
+                "title": "OpenAIJSONSchema"
             },
-            "OpenAIDeveloperMessageParam": {
+            "OpenAIResponseFormatJSONObject": {
                 "type": "object",
                 "properties": {
-                    "role": {
+                    "type": {
                         "type": "string",
-                        "const": "developer",
-                        "default": "developer",
-                        "description": "Must be \"developer\" to identify this as a developer message"
-                    },
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The content of the developer message"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "(Optional) The name of the developer message participant."
+                        "const": "json_object",
+                        "default": "json_object"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "role",
-                    "content"
+                    "type"
                 ],
-                "title": "OpenAIDeveloperMessageParam",
-                "description": "A message from the developer in an OpenAI-compatible chat completion request."
+                "title": "OpenAIResponseFormatJSONObject"
             },
-            "OpenAIMessageParam": {
+            "OpenAIResponseFormatJSONSchema": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "json_schema",
+                        "default": "json_schema"
+                    },
+                    "json_schema": {
+                        "$ref": "#/components/schemas/OpenAIJSONSchema"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "json_schema"
+                ],
+                "title": "OpenAIResponseFormatJSONSchema"
+            },
+            "OpenAIResponseFormatParam": {
                 "oneOf": [
                     {
-                        "$ref": "#/components/schemas/OpenAIUserMessageParam"
+                        "$ref": "#/components/schemas/OpenAIResponseFormatText"
                     },
                     {
-                        "$ref": "#/components/schemas/OpenAISystemMessageParam"
+                        "$ref": "#/components/schemas/OpenAIResponseFormatJSONSchema"
                     },
                     {
-                        "$ref": "#/components/schemas/OpenAIAssistantMessageParam"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIToolMessageParam"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIDeveloperMessageParam"
+                        "$ref": "#/components/schemas/OpenAIResponseFormatJSONObject"
                     }
                 ],
                 "discriminator": {
-                    "propertyName": "role",
+                    "propertyName": "type",
                     "mapping": {
-                        "user": "#/components/schemas/OpenAIUserMessageParam",
-                        "system": "#/components/schemas/OpenAISystemMessageParam",
-                        "assistant": "#/components/schemas/OpenAIAssistantMessageParam",
-                        "tool": "#/components/schemas/OpenAIToolMessageParam",
-                        "developer": "#/components/schemas/OpenAIDeveloperMessageParam"
+                        "text": "#/components/schemas/OpenAIResponseFormatText",
+                        "json_schema": "#/components/schemas/OpenAIResponseFormatJSONSchema",
+                        "json_object": "#/components/schemas/OpenAIResponseFormatJSONObject"
                     }
                 }
             },
-            "OpenAISystemMessageParam": {
+            "OpenAIResponseFormatText": {
                 "type": "object",
                 "properties": {
-                    "role": {
+                    "type": {
                         "type": "string",
-                        "const": "system",
-                        "default": "system",
-                        "description": "Must be \"system\" to identify this as a system message"
-                    },
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The content of the \"system prompt\". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions)."
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "(Optional) The name of the system message participant."
+                        "const": "text",
+                        "default": "text"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "role",
-                    "content"
+                    "type"
                 ],
-                "title": "OpenAISystemMessageParam",
-                "description": "A system message providing instructions or context to the model."
-            },
-            "OpenAIToolMessageParam": {
-                "type": "object",
-                "properties": {
-                    "role": {
-                        "type": "string",
-                        "const": "tool",
-                        "default": "tool",
-                        "description": "Must be \"tool\" to identify this as a tool response"
-                    },
-                    "tool_call_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the tool call this response is for"
-                    },
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The response content from the tool"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "role",
-                    "tool_call_id",
-                    "content"
-                ],
-                "title": "OpenAIToolMessageParam",
-                "description": "A message representing the result of a tool invocation in an OpenAI-compatible chat completion request."
-            },
-            "OpenAIUserMessageParam": {
-                "type": "object",
-                "properties": {
-                    "role": {
-                        "type": "string",
-                        "const": "user",
-                        "default": "user",
-                        "description": "Must be \"user\" to identify this as a user message"
-                    },
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The content of the message, which can include text and other media"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "(Optional) The name of the user message participant."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "role",
-                    "content"
-                ],
-                "title": "OpenAIUserMessageParam",
-                "description": "A message from the user in an OpenAI-compatible chat completion request."
+                "title": "OpenAIResponseFormatText"
             },
             "OpenaiChatCompletionRequest": {
                 "type": "object",
@@ -8994,11 +11239,11 @@
                         "items": {
                             "$ref": "#/components/schemas/OpenAIMessageParam"
                         },
-                        "description": "List of messages in the conversation"
+                        "description": "List of messages in the conversation."
                     },
                     "frequency_penalty": {
                         "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens"
+                        "description": "(Optional) The penalty for repeated tokens."
                     },
                     "function_call": {
                         "oneOf": [
@@ -9031,7 +11276,7 @@
                                 }
                             }
                         ],
-                        "description": "(Optional) The function call to use"
+                        "description": "(Optional) The function call to use."
                     },
                     "functions": {
                         "type": "array",
@@ -9060,49 +11305,46 @@
                                 ]
                             }
                         },
-                        "description": "(Optional) List of functions to use"
+                        "description": "(Optional) List of functions to use."
                     },
                     "logit_bias": {
                         "type": "object",
                         "additionalProperties": {
                             "type": "number"
                         },
-                        "description": "(Optional) The logit bias to use"
+                        "description": "(Optional) The logit bias to use."
                     },
                     "logprobs": {
                         "type": "boolean",
-                        "description": "(Optional) The log probabilities to use"
+                        "description": "(Optional) The log probabilities to use."
                     },
                     "max_completion_tokens": {
                         "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate"
+                        "description": "(Optional) The maximum number of tokens to generate."
                     },
                     "max_tokens": {
                         "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate"
+                        "description": "(Optional) The maximum number of tokens to generate."
                     },
                     "n": {
                         "type": "integer",
-                        "description": "(Optional) The number of completions to generate"
+                        "description": "(Optional) The number of completions to generate."
                     },
                     "parallel_tool_calls": {
                         "type": "boolean",
-                        "description": "(Optional) Whether to parallelize tool calls"
+                        "description": "(Optional) Whether to parallelize tool calls."
                     },
                     "presence_penalty": {
                         "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens"
+                        "description": "(Optional) The penalty for repeated tokens."
                     },
                     "response_format": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "string"
-                        },
-                        "description": "(Optional) The response format to use"
+                        "$ref": "#/components/schemas/OpenAIResponseFormatParam",
+                        "description": "(Optional) The response format to use."
                     },
                     "seed": {
                         "type": "integer",
-                        "description": "(Optional) The seed to use"
+                        "description": "(Optional) The seed to use."
                     },
                     "stop": {
                         "oneOf": [
@@ -9116,11 +11358,11 @@
                                 }
                             }
                         ],
-                        "description": "(Optional) The stop tokens to use"
+                        "description": "(Optional) The stop tokens to use."
                     },
                     "stream": {
                         "type": "boolean",
-                        "description": "(Optional) Whether to stream the response"
+                        "description": "(Optional) Whether to stream the response."
                     },
                     "stream_options": {
                         "type": "object",
@@ -9146,11 +11388,11 @@
                                 }
                             ]
                         },
-                        "description": "(Optional) The stream options to use"
+                        "description": "(Optional) The stream options to use."
                     },
                     "temperature": {
                         "type": "number",
-                        "description": "(Optional) The temperature to use"
+                        "description": "(Optional) The temperature to use."
                     },
                     "tool_choice": {
                         "oneOf": [
@@ -9183,7 +11425,7 @@
                                 }
                             }
                         ],
-                        "description": "(Optional) The tool choice to use"
+                        "description": "(Optional) The tool choice to use."
                     },
                     "tools": {
                         "type": "array",
@@ -9212,19 +11454,19 @@
                                 ]
                             }
                         },
-                        "description": "(Optional) The tools to use"
+                        "description": "(Optional) The tools to use."
                     },
                     "top_logprobs": {
                         "type": "integer",
-                        "description": "(Optional) The top log probabilities to use"
+                        "description": "(Optional) The top log probabilities to use."
                     },
                     "top_p": {
                         "type": "number",
-                        "description": "(Optional) The top p to use"
+                        "description": "(Optional) The top p to use."
                     },
                     "user": {
                         "type": "string",
-                        "description": "(Optional) The user to use"
+                        "description": "(Optional) The user to use."
                     }
                 },
                 "additionalProperties": false,
@@ -9274,107 +11516,101 @@
                 "title": "OpenAIChatCompletion",
                 "description": "Response from an OpenAI-compatible chat completion request."
             },
-            "OpenAIChoice": {
+            "OpenAIChatCompletionChunk": {
                 "type": "object",
                 "properties": {
-                    "message": {
-                        "$ref": "#/components/schemas/OpenAIMessageParam",
-                        "description": "The message from the model"
+                    "id": {
+                        "type": "string",
+                        "description": "The ID of the chat completion"
+                    },
+                    "choices": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIChunkChoice"
+                        },
+                        "description": "List of choices"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "chat.completion.chunk",
+                        "default": "chat.completion.chunk",
+                        "description": "The object type, which will be \"chat.completion.chunk\""
+                    },
+                    "created": {
+                        "type": "integer",
+                        "description": "The Unix timestamp in seconds when the chat completion was created"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model that was used to generate the chat completion"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "choices",
+                    "object",
+                    "created",
+                    "model"
+                ],
+                "title": "OpenAIChatCompletionChunk",
+                "description": "Chunk from a streaming response to an OpenAI-compatible chat completion request."
+            },
+            "OpenAIChoiceDelta": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "type": "string",
+                        "description": "(Optional) The content of the delta"
+                    },
+                    "refusal": {
+                        "type": "string",
+                        "description": "(Optional) The refusal of the delta"
+                    },
+                    "role": {
+                        "type": "string",
+                        "description": "(Optional) The role of the delta"
+                    },
+                    "tool_calls": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
+                        },
+                        "description": "(Optional) The tool calls of the delta"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "OpenAIChoiceDelta",
+                "description": "A delta from an OpenAI-compatible chat completion streaming response."
+            },
+            "OpenAIChunkChoice": {
+                "type": "object",
+                "properties": {
+                    "delta": {
+                        "$ref": "#/components/schemas/OpenAIChoiceDelta",
+                        "description": "The delta from the chunk"
                     },
                     "finish_reason": {
                         "type": "string",
                         "description": "The reason the model stopped generating"
                     },
                     "index": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The index of the choice"
                     },
                     "logprobs": {
-                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs"
+                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs",
+                        "description": "(Optional) The log probabilities for the tokens in the message"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "message",
+                    "delta",
                     "finish_reason",
                     "index"
                 ],
-                "title": "OpenAIChoice",
-                "description": "A choice from an OpenAI-compatible chat completion response."
-            },
-            "OpenAIChoiceLogprobs": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAITokenLogProb"
-                        }
-                    },
-                    "refusal": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAITokenLogProb"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "title": "OpenAIChoiceLogprobs",
-                "description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
-            },
-            "OpenAITokenLogProb": {
-                "type": "object",
-                "properties": {
-                    "token": {
-                        "type": "string"
-                    },
-                    "bytes": {
-                        "type": "array",
-                        "items": {
-                            "type": "integer"
-                        }
-                    },
-                    "logprob": {
-                        "type": "number"
-                    },
-                    "top_logprobs": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAITopLogProb"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "token",
-                    "logprob",
-                    "top_logprobs"
-                ],
-                "title": "OpenAITokenLogProb",
-                "description": "The log probability for a token from an OpenAI-compatible chat completion response."
-            },
-            "OpenAITopLogProb": {
-                "type": "object",
-                "properties": {
-                    "token": {
-                        "type": "string"
-                    },
-                    "bytes": {
-                        "type": "array",
-                        "items": {
-                            "type": "integer"
-                        }
-                    },
-                    "logprob": {
-                        "type": "number"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "token",
-                    "logprob"
-                ],
-                "title": "OpenAITopLogProb",
-                "description": "The top log probability for a token from an OpenAI-compatible chat completion response."
+                "title": "OpenAIChunkChoice",
+                "description": "A chunk choice from an OpenAI-compatible chat completion streaming response."
             },
             "OpenaiCompletionRequest": {
                 "type": "object",
@@ -9410,46 +11646,46 @@
                                 }
                             }
                         ],
-                        "description": "The prompt to generate a completion for"
+                        "description": "The prompt to generate a completion for."
                     },
                     "best_of": {
                         "type": "integer",
-                        "description": "(Optional) The number of completions to generate"
+                        "description": "(Optional) The number of completions to generate."
                     },
                     "echo": {
                         "type": "boolean",
-                        "description": "(Optional) Whether to echo the prompt"
+                        "description": "(Optional) Whether to echo the prompt."
                     },
                     "frequency_penalty": {
                         "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens"
+                        "description": "(Optional) The penalty for repeated tokens."
                     },
                     "logit_bias": {
                         "type": "object",
                         "additionalProperties": {
                             "type": "number"
                         },
-                        "description": "(Optional) The logit bias to use"
+                        "description": "(Optional) The logit bias to use."
                     },
                     "logprobs": {
                         "type": "boolean",
-                        "description": "(Optional) The log probabilities to use"
+                        "description": "(Optional) The log probabilities to use."
                     },
                     "max_tokens": {
                         "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate"
+                        "description": "(Optional) The maximum number of tokens to generate."
                     },
                     "n": {
                         "type": "integer",
-                        "description": "(Optional) The number of completions to generate"
+                        "description": "(Optional) The number of completions to generate."
                     },
                     "presence_penalty": {
                         "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens"
+                        "description": "(Optional) The penalty for repeated tokens."
                     },
                     "seed": {
                         "type": "integer",
-                        "description": "(Optional) The seed to use"
+                        "description": "(Optional) The seed to use."
                     },
                     "stop": {
                         "oneOf": [
@@ -9463,11 +11699,11 @@
                                 }
                             }
                         ],
-                        "description": "(Optional) The stop tokens to use"
+                        "description": "(Optional) The stop tokens to use."
                     },
                     "stream": {
                         "type": "boolean",
-                        "description": "(Optional) Whether to stream the response"
+                        "description": "(Optional) Whether to stream the response."
                     },
                     "stream_options": {
                         "type": "object",
@@ -9493,19 +11729,19 @@
                                 }
                             ]
                         },
-                        "description": "(Optional) The stream options to use"
+                        "description": "(Optional) The stream options to use."
                     },
                     "temperature": {
                         "type": "number",
-                        "description": "(Optional) The temperature to use"
+                        "description": "(Optional) The temperature to use."
                     },
                     "top_p": {
                         "type": "number",
-                        "description": "(Optional) The top p to use"
+                        "description": "(Optional) The top p to use."
                     },
                     "user": {
                         "type": "string",
-                        "description": "(Optional) The user to use"
+                        "description": "(Optional) The user to use."
                     },
                     "guided_choice": {
                         "type": "array",
@@ -9584,6 +11820,139 @@
                 "title": "OpenAICompletionChoice",
                 "description": "A choice from an OpenAI-compatible completion response."
             },
+            "OpenaiEmbeddingsRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "description": "The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint."
+                    },
+                    "input": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ],
+                        "description": "Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings."
+                    },
+                    "encoding_format": {
+                        "type": "string",
+                        "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"."
+                    },
+                    "dimensions": {
+                        "type": "integer",
+                        "description": "(Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models."
+                    },
+                    "user": {
+                        "type": "string",
+                        "description": "(Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "input"
+                ],
+                "title": "OpenaiEmbeddingsRequest"
+            },
+            "OpenAIEmbeddingData": {
+                "type": "object",
+                "properties": {
+                    "object": {
+                        "type": "string",
+                        "const": "embedding",
+                        "default": "embedding",
+                        "description": "The object type, which will be \"embedding\""
+                    },
+                    "embedding": {
+                        "oneOf": [
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "number"
+                                }
+                            },
+                            {
+                                "type": "string"
+                            }
+                        ],
+                        "description": "The embedding vector as a list of floats (when encoding_format=\"float\") or as a base64-encoded string (when encoding_format=\"base64\")"
+                    },
+                    "index": {
+                        "type": "integer",
+                        "description": "The index of the embedding in the input list"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "object",
+                    "embedding",
+                    "index"
+                ],
+                "title": "OpenAIEmbeddingData",
+                "description": "A single embedding data object from an OpenAI-compatible embeddings response."
+            },
+            "OpenAIEmbeddingUsage": {
+                "type": "object",
+                "properties": {
+                    "prompt_tokens": {
+                        "type": "integer",
+                        "description": "The number of tokens in the input"
+                    },
+                    "total_tokens": {
+                        "type": "integer",
+                        "description": "The total number of tokens used"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "prompt_tokens",
+                    "total_tokens"
+                ],
+                "title": "OpenAIEmbeddingUsage",
+                "description": "Usage information for an OpenAI-compatible embeddings response."
+            },
+            "OpenAIEmbeddingsResponse": {
+                "type": "object",
+                "properties": {
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list",
+                        "description": "The object type, which will be \"list\""
+                    },
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIEmbeddingData"
+                        },
+                        "description": "List of embedding data objects"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model that was used to generate the embeddings"
+                    },
+                    "usage": {
+                        "$ref": "#/components/schemas/OpenAIEmbeddingUsage",
+                        "description": "Usage information"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "object",
+                    "data",
+                    "model",
+                    "usage"
+                ],
+                "title": "OpenAIEmbeddingsResponse",
+                "description": "Response from an OpenAI-compatible embeddings request."
+            },
             "OpenAIModel": {
                 "type": "object",
                 "properties": {
@@ -9798,16 +12167,20 @@
                 "type": "object",
                 "properties": {
                     "job_uuid": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The UUID of the job to create."
                     },
                     "finetuned_model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The model to fine-tune."
                     },
                     "algorithm_config": {
-                        "$ref": "#/components/schemas/DPOAlignmentConfig"
+                        "$ref": "#/components/schemas/DPOAlignmentConfig",
+                        "description": "The algorithm configuration."
                     },
                     "training_config": {
-                        "$ref": "#/components/schemas/TrainingConfig"
+                        "$ref": "#/components/schemas/TrainingConfig",
+                        "description": "The training configuration."
                     },
                     "hyperparam_search_config": {
                         "type": "object",
@@ -9832,7 +12205,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The hyperparam search configuration."
                     },
                     "logger_config": {
                         "type": "object",
@@ -9857,7 +12231,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The logger configuration."
                     }
                 },
                 "additionalProperties": false,
@@ -9931,24 +12306,38 @@
                 "type": "object",
                 "properties": {
                     "query_generator_config": {
-                        "$ref": "#/components/schemas/RAGQueryGeneratorConfig"
+                        "$ref": "#/components/schemas/RAGQueryGeneratorConfig",
+                        "description": "Configuration for the query generator."
                     },
                     "max_tokens_in_context": {
                         "type": "integer",
-                        "default": 4096
+                        "default": 4096,
+                        "description": "Maximum number of tokens in the context."
                     },
                     "max_chunks": {
                         "type": "integer",
-                        "default": 5
+                        "default": 5,
+                        "description": "Maximum number of chunks to retrieve."
+                    },
+                    "chunk_template": {
+                        "type": "string",
+                        "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
+                        "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
+                    },
+                    "mode": {
+                        "type": "string",
+                        "description": "Search mode for retrieval—either \"vector\" or \"keyword\". Default \"vector\"."
                     }
                 },
                 "additionalProperties": false,
                 "required": [
                     "query_generator_config",
                     "max_tokens_in_context",
-                    "max_chunks"
+                    "max_chunks",
+                    "chunk_template"
                 ],
-                "title": "RAGQueryConfig"
+                "title": "RAGQueryConfig",
+                "description": "Configuration for the RAG query generation."
             },
             "RAGQueryGeneratorConfig": {
                 "oneOf": [
@@ -10032,10 +12421,12 @@
                 "type": "object",
                 "properties": {
                     "vector_db_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the vector database to query."
                     },
                     "query": {
-                        "$ref": "#/components/schemas/InterleavedContent"
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The query to search for."
                     },
                     "params": {
                         "type": "object",
@@ -10060,7 +12451,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The parameters of the query."
                     }
                 },
                 "additionalProperties": false,
@@ -10079,7 +12471,8 @@
                             "type": "object",
                             "properties": {
                                 "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent"
+                                    "$ref": "#/components/schemas/InterleavedContent",
+                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
                                 },
                                 "metadata": {
                                     "type": "object",
@@ -10104,7 +12497,15 @@
                                                 "type": "object"
                                             }
                                         ]
-                                    }
+                                    },
+                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
+                                },
+                                "embedding": {
+                                    "type": "array",
+                                    "items": {
+                                        "type": "number"
+                                    },
+                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                                 }
                             },
                             "additionalProperties": false,
@@ -10112,7 +12513,8 @@
                                 "content",
                                 "metadata"
                             ],
-                            "title": "Chunk"
+                            "title": "Chunk",
+                            "description": "A chunk of content that can be inserted into a vector database."
                         }
                     },
                     "scores": {
@@ -10129,6 +12531,147 @@
                 ],
                 "title": "QueryChunksResponse"
             },
+            "QueryMetricsRequest": {
+                "type": "object",
+                "properties": {
+                    "start_time": {
+                        "type": "integer",
+                        "description": "The start time of the metric to query."
+                    },
+                    "end_time": {
+                        "type": "integer",
+                        "description": "The end time of the metric to query."
+                    },
+                    "granularity": {
+                        "type": "string",
+                        "description": "The granularity of the metric to query."
+                    },
+                    "query_type": {
+                        "type": "string",
+                        "enum": [
+                            "range",
+                            "instant"
+                        ],
+                        "description": "The type of query to perform."
+                    },
+                    "label_matchers": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "name": {
+                                    "type": "string"
+                                },
+                                "value": {
+                                    "type": "string"
+                                },
+                                "operator": {
+                                    "type": "string",
+                                    "enum": [
+                                        "=",
+                                        "!=",
+                                        "=~",
+                                        "!~"
+                                    ],
+                                    "title": "MetricLabelOperator",
+                                    "default": "="
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "name",
+                                "value",
+                                "operator"
+                            ],
+                            "title": "MetricLabelMatcher"
+                        },
+                        "description": "The label matchers to apply to the metric."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "start_time",
+                    "query_type"
+                ],
+                "title": "QueryMetricsRequest"
+            },
+            "MetricDataPoint": {
+                "type": "object",
+                "properties": {
+                    "timestamp": {
+                        "type": "integer"
+                    },
+                    "value": {
+                        "type": "number"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "timestamp",
+                    "value"
+                ],
+                "title": "MetricDataPoint"
+            },
+            "MetricLabel": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string"
+                    },
+                    "value": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "name",
+                    "value"
+                ],
+                "title": "MetricLabel"
+            },
+            "MetricSeries": {
+                "type": "object",
+                "properties": {
+                    "metric": {
+                        "type": "string"
+                    },
+                    "labels": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricLabel"
+                        }
+                    },
+                    "values": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricDataPoint"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "metric",
+                    "labels",
+                    "values"
+                ],
+                "title": "MetricSeries"
+            },
+            "QueryMetricsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricSeries"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "QueryMetricsResponse"
+            },
             "QueryCondition": {
                 "type": "object",
                 "properties": {
@@ -10186,16 +12729,19 @@
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/QueryCondition"
-                        }
+                        },
+                        "description": "The attribute filters to apply to the spans."
                     },
                     "attributes_to_return": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The attributes to return in the spans."
                     },
                     "max_depth": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
                     }
                 },
                 "additionalProperties": false,
@@ -10228,19 +12774,23 @@
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/QueryCondition"
-                        }
+                        },
+                        "description": "The attribute filters to apply to the traces."
                     },
                     "limit": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The limit of traces to return."
                     },
                     "offset": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The offset of the traces to return."
                     },
                     "order_by": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The order by of the traces to return."
                     }
                 },
                 "additionalProperties": false,
@@ -10266,22 +12816,27 @@
                 "type": "object",
                 "properties": {
                     "benchmark_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the benchmark to register."
                     },
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the dataset to use for the benchmark."
                     },
                     "scoring_functions": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The scoring functions to use for the benchmark."
                     },
                     "provider_benchmark_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider benchmark to use for the benchmark."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider to use for the benchmark."
                     },
                     "metadata": {
                         "type": "object",
@@ -10306,7 +12861,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The metadata to use for the benchmark."
                     }
                 },
                 "additionalProperties": false,
@@ -10327,7 +12883,7 @@
                             "eval/question-answer",
                             "eval/messages-answer"
                         ],
-                        "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }"
+                        "description": "The purpose of the dataset. One of: - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }"
                     },
                     "source": {
                         "$ref": "#/components/schemas/DataSource",
@@ -10357,7 +12913,7 @@
                                 }
                             ]
                         },
-                        "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}"
+                        "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}."
                     },
                     "dataset_id": {
                         "type": "string",
@@ -10375,13 +12931,16 @@
                 "type": "object",
                 "properties": {
                     "model_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the model to register."
                     },
                     "provider_model_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the model in the provider."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the provider."
                     },
                     "metadata": {
                         "type": "object",
@@ -10406,10 +12965,12 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "Any additional metadata for this model."
                     },
                     "model_type": {
-                        "$ref": "#/components/schemas/ModelType"
+                        "$ref": "#/components/schemas/ModelType",
+                        "description": "The type of model to register."
                     }
                 },
                 "additionalProperties": false,
@@ -10422,22 +12983,28 @@
                 "type": "object",
                 "properties": {
                     "scoring_fn_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the scoring function to register."
                     },
                     "description": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The description of the scoring function."
                     },
                     "return_type": {
-                        "$ref": "#/components/schemas/ParamType"
+                        "$ref": "#/components/schemas/ParamType",
+                        "description": "The return type of the scoring function."
                     },
                     "provider_scoring_fn_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider scoring function to use for the scoring function."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider to use for the scoring function."
                     },
                     "params": {
-                        "$ref": "#/components/schemas/ScoringFnParams"
+                        "$ref": "#/components/schemas/ScoringFnParams",
+                        "description": "The parameters for the scoring function for benchmark eval, these can be overridden for app eval."
                     }
                 },
                 "additionalProperties": false,
@@ -10452,13 +13019,16 @@
                 "type": "object",
                 "properties": {
                     "shield_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the shield to register."
                     },
                     "provider_shield_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the shield in the provider."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the provider."
                     },
                     "params": {
                         "type": "object",
@@ -10483,7 +13053,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The parameters of the shield."
                     }
                 },
                 "additionalProperties": false,
@@ -10496,13 +13067,16 @@
                 "type": "object",
                 "properties": {
                     "toolgroup_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the tool group to register."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider to use for the tool group."
                     },
                     "mcp_endpoint": {
-                        "$ref": "#/components/schemas/URL"
+                        "$ref": "#/components/schemas/URL",
+                        "description": "The MCP endpoint to use for the tool group."
                     },
                     "args": {
                         "type": "object",
@@ -10527,7 +13101,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "A dictionary of arguments to pass to the tool group."
                     }
                 },
                 "additionalProperties": false,
@@ -10541,19 +13116,24 @@
                 "type": "object",
                 "properties": {
                     "vector_db_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the vector database to register."
                     },
                     "embedding_model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The embedding model to use."
                     },
                     "embedding_dimension": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The dimension of the embedding model."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the provider."
                     },
                     "provider_vector_db_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the vector database in the provider."
                     }
                 },
                 "additionalProperties": false,
@@ -10602,13 +13182,15 @@
                 "type": "object",
                 "properties": {
                     "shield_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the shield to run."
                     },
                     "messages": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/Message"
-                        }
+                        },
+                        "description": "The messages to run the shield on."
                     },
                     "params": {
                         "type": "object",
@@ -10633,7 +13215,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The parameters of the shield."
                     }
                 },
                 "additionalProperties": false,
@@ -10661,19 +13244,23 @@
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/QueryCondition"
-                        }
+                        },
+                        "description": "The attribute filters to apply to the spans."
                     },
                     "attributes_to_save": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The attributes to save to the dataset."
                     },
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the dataset to save the spans to."
                     },
                     "max_depth": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
                     }
                 },
                 "additionalProperties": false,
@@ -10760,7 +13347,8 @@
                 "type": "object",
                 "properties": {
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the dataset to score."
                     },
                     "scoring_functions": {
                         "type": "object",
@@ -10773,10 +13361,12 @@
                                     "type": "null"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The scoring functions to use for the scoring."
                     },
                     "save_results_dataset": {
-                        "type": "boolean"
+                        "type": "boolean",
+                        "description": "Whether to save the results to a dataset."
                     }
                 },
                 "additionalProperties": false,
@@ -10896,10 +13486,12 @@
                 "type": "object",
                 "properties": {
                     "job_uuid": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The UUID of the job to create."
                     },
                     "training_config": {
-                        "$ref": "#/components/schemas/TrainingConfig"
+                        "$ref": "#/components/schemas/TrainingConfig",
+                        "description": "The training configuration."
                     },
                     "hyperparam_search_config": {
                         "type": "object",
@@ -10924,7 +13516,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The hyperparam search configuration."
                     },
                     "logger_config": {
                         "type": "object",
@@ -10949,16 +13542,20 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The logger configuration."
                     },
                     "model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The model to fine-tune."
                     },
                     "checkpoint_dir": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The directory to save checkpoint(s) to."
                     },
                     "algorithm_config": {
-                        "$ref": "#/components/schemas/AlgorithmConfig"
+                        "$ref": "#/components/schemas/AlgorithmConfig",
+                        "description": "The algorithm configuration."
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index fa7b130e2..7638c3cbd 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -27,10 +27,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - DatasetIO
-      description: ''
+      description: Append rows to a dataset.
       parameters:
         - name: dataset_id
           in: path
+          description: >-
+            The ID of the dataset to append the rows to.
           required: true
           schema:
             type: string
@@ -44,7 +46,8 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: >-
+            A BatchChatCompletionResponse with the full completions.
           content:
             application/json:
               schema:
@@ -61,7 +64,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      description: ''
+      description: >-
+        Generate chat completions for a batch of messages using the specified model.
       parameters: []
       requestBody:
         content:
@@ -73,7 +77,8 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: >-
+            A BatchCompletionResponse with the full completions.
           content:
             application/json:
               schema:
@@ -90,7 +95,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      description: ''
+      description: >-
+        Generate completions for a batch of content using the specified model.
       parameters: []
       requestBody:
         content:
@@ -115,7 +121,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Cancel a training job.
       parameters: []
       requestBody:
         content:
@@ -129,7 +135,7 @@ paths:
         '200':
           description: >-
             If stream=False, returns a ChatCompletionResponse with the full completion.
-            If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
+            If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk.
           content:
             application/json:
               schema:
@@ -164,7 +170,7 @@ paths:
         '200':
           description: >-
             If stream=False, returns a CompletionResponse with the full completion.
-            If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
+            If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
           content:
             application/json:
               schema:
@@ -197,11 +203,11 @@ paths:
     get:
       responses:
         '200':
-          description: A ListAgentsResponse.
+          description: A PaginatedResponse.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListAgentsResponse'
+                $ref: '#/components/schemas/PaginatedResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
         '429':
@@ -215,7 +221,19 @@ paths:
       tags:
         - Agents
       description: List all agents.
-      parameters: []
+      parameters:
+        - name: start_index
+          in: query
+          description: The index to start the pagination from.
+          required: false
+          schema:
+            type: integer
+        - name: limit
+          in: query
+          description: The number of agents to return.
+          required: false
+          schema:
+            type: integer
     post:
       responses:
         '200':
@@ -288,7 +306,7 @@ paths:
         '200':
           description: >-
             If stream=False, returns a Turn object. If stream=True, returns an SSE
-            event stream of AgentTurnResponseStreamChunk
+            event stream of AgentTurnResponseStreamChunk.
           content:
             application/json:
               schema:
@@ -330,11 +348,90 @@ paths:
             schema:
               $ref: '#/components/schemas/CreateAgentTurnRequest'
         required: true
+  /v1/openai/v1/responses:
+    get:
+      responses:
+        '200':
+          description: A ListOpenAIResponseObject.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIResponseObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      description: List all OpenAI responses.
+      parameters:
+        - name: after
+          in: query
+          description: The ID of the last response to return.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: The number of responses to return.
+          required: false
+          schema:
+            type: integer
+        - name: model
+          in: query
+          description: The model to filter responses by.
+          required: false
+          schema:
+            type: string
+        - name: order
+          in: query
+          description: >-
+            The order to sort responses by when sorted by created_at ('asc' or 'desc').
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
+    post:
+      responses:
+        '200':
+          description: An OpenAIResponseObject.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIResponseObject'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/OpenAIResponseObjectStream'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      description: Create a new OpenAI response.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateOpenaiResponseRequest'
+        required: true
   /v1/files:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListBucketResponse.
           content:
             application/json:
               schema:
@@ -355,13 +452,14 @@ paths:
       parameters:
         - name: bucket
           in: query
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-).'
           required: true
           schema:
             type: string
     post:
       responses:
         '200':
-          description: OK
+          description: A FileUploadResponse.
           content:
             application/json:
               schema:
@@ -432,7 +530,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      description: Delete an agent by its ID.
+      description: >-
+        Delete an agent by its ID and its associated sessions and turns.
       parameters:
         - name: agent_id
           in: path
@@ -444,7 +543,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Session.
           content:
             application/json:
               schema:
@@ -501,7 +600,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      description: Delete an agent session by its ID.
+      description: >-
+        Delete an agent session by its ID and its associated turns.
       parameters:
         - name: session_id
           in: path
@@ -520,7 +620,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A FileResponse.
           content:
             application/json:
               schema:
@@ -542,14 +642,14 @@ paths:
       parameters:
         - name: bucket
           in: path
-          description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-).'
           required: true
           schema:
             type: string
         - name: key
           in: path
           description: >-
-            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
           required: true
           schema:
             type: string
@@ -574,14 +674,14 @@ paths:
       parameters:
         - name: bucket
           in: path
-          description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-).'
           required: true
           schema:
             type: string
         - name: key
           in: path
           description: >-
-            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
           required: true
           schema:
             type: string
@@ -592,7 +692,7 @@ paths:
           description: >-
             An array of embeddings, one for each content. Each embedding is a list
             of floats. The dimensionality of the embedding is model-specific; you
-            can check model metadata using /models/{model_id}
+            can check model metadata using /models/{model_id}.
           content:
             application/json:
               schema:
@@ -623,7 +723,7 @@ paths:
       responses:
         '200':
           description: >-
-            EvaluateResponse object containing generations and scores
+            EvaluateResponse object containing generations and scores.
           content:
             application/json:
               schema:
@@ -749,7 +849,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Benchmark.
           content:
             application/json:
               schema:
@@ -766,10 +866,40 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
-      description: ''
+      description: Get a benchmark by its ID.
       parameters:
         - name: benchmark_id
           in: path
+          description: The ID of the benchmark to get.
+          required: true
+          schema:
+            type: string
+  /v1/openai/v1/chat/completions/{completion_id}:
+    get:
+      responses:
+        '200':
+          description: A OpenAICompletionWithInputMessages.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      description: Describe a chat completion by its ID.
+      parameters:
+        - name: completion_id
+          in: path
+          description: ID of the chat completion.
           required: true
           schema:
             type: string
@@ -777,7 +907,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Dataset.
           content:
             application/json:
               schema:
@@ -794,10 +924,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
-      description: ''
+      description: Get a dataset by its ID.
       parameters:
         - name: dataset_id
           in: path
+          description: The ID of the dataset to get.
           required: true
           schema:
             type: string
@@ -817,10 +948,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
-      description: ''
+      description: Unregister a dataset by its ID.
       parameters:
         - name: dataset_id
           in: path
+          description: The ID of the dataset to unregister.
           required: true
           schema:
             type: string
@@ -828,7 +960,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Model.
           content:
             application/json:
               schema:
@@ -845,10 +977,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: Get a model by its identifier.
       parameters:
         - name: model_id
           in: path
+          description: The identifier of the model to get.
           required: true
           schema:
             type: string
@@ -868,10 +1001,42 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: Unregister a model.
       parameters:
         - name: model_id
           in: path
+          description: >-
+            The identifier of the model to unregister.
+          required: true
+          schema:
+            type: string
+  /v1/openai/v1/responses/{response_id}:
+    get:
+      responses:
+        '200':
+          description: An OpenAIResponseObject.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIResponseObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      description: Retrieve an OpenAI response by its ID.
+      parameters:
+        - name: response_id
+          in: path
+          description: >-
+            The ID of the OpenAI response to retrieve.
           required: true
           schema:
             type: string
@@ -879,7 +1044,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ScoringFn.
           content:
             application/json:
               schema:
@@ -896,10 +1061,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
-      description: ''
+      description: Get a scoring function by its ID.
       parameters:
         - name: scoring_fn_id
           in: path
+          description: The ID of the scoring function to get.
           required: true
           schema:
             type: string
@@ -907,7 +1073,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Shield.
           content:
             application/json:
               schema:
@@ -924,10 +1090,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
-      description: ''
+      description: Get a shield by its identifier.
       parameters:
         - name: identifier
           in: path
+          description: The identifier of the shield to get.
           required: true
           schema:
             type: string
@@ -935,7 +1102,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Span.
           content:
             application/json:
               schema:
@@ -952,15 +1119,18 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Get a span by its ID.
       parameters:
         - name: trace_id
           in: path
+          description: >-
+            The ID of the trace to get the span from.
           required: true
           schema:
             type: string
         - name: span_id
           in: path
+          description: The ID of the span to get.
           required: true
           schema:
             type: string
@@ -968,7 +1138,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A QuerySpanTreeResponse.
           content:
             application/json:
               schema:
@@ -985,10 +1155,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Get a span tree by its ID.
       parameters:
         - name: span_id
           in: path
+          description: The ID of the span to get the tree from.
           required: true
           schema:
             type: string
@@ -1002,7 +1173,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Tool.
           content:
             application/json:
               schema:
@@ -1019,10 +1190,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: ''
+      description: Get a tool by its name.
       parameters:
         - name: tool_name
           in: path
+          description: The name of the tool to get.
           required: true
           schema:
             type: string
@@ -1030,7 +1202,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ToolGroup.
           content:
             application/json:
               schema:
@@ -1047,10 +1219,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: ''
+      description: Get a tool group by its ID.
       parameters:
         - name: toolgroup_id
           in: path
+          description: The ID of the tool group to get.
           required: true
           schema:
             type: string
@@ -1070,10 +1243,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: Unregister a tool group
+      description: Unregister a tool group.
       parameters:
         - name: toolgroup_id
           in: path
+          description: The ID of the tool group to unregister.
           required: true
           schema:
             type: string
@@ -1081,7 +1255,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Trace.
           content:
             application/json:
               schema:
@@ -1098,10 +1272,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Get a trace by its ID.
       parameters:
         - name: trace_id
           in: path
+          description: The ID of the trace to get.
           required: true
           schema:
             type: string
@@ -1109,7 +1284,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A PostTrainingJobArtifactsResponse.
           content:
             application/json:
               schema:
@@ -1126,10 +1301,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Get the artifacts of a training job.
       parameters:
         - name: job_uuid
           in: query
+          description: >-
+            The UUID of the job to get the artifacts of.
           required: true
           schema:
             type: string
@@ -1137,7 +1314,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A PostTrainingJobStatusResponse.
           content:
             application/json:
               schema:
@@ -1154,10 +1331,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Get the status of a training job.
       parameters:
         - name: job_uuid
           in: query
+          description: >-
+            The UUID of the job to get the status of.
           required: true
           schema:
             type: string
@@ -1165,7 +1344,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListPostTrainingJobsResponse.
           content:
             application/json:
               schema:
@@ -1182,13 +1361,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Get all training jobs.
       parameters: []
   /v1/files/session:{upload_id}:
     get:
       responses:
         '200':
-          description: OK
+          description: A FileUploadResponse.
           content:
             application/json:
               schema:
@@ -1206,18 +1385,19 @@ paths:
       tags:
         - Files
       description: >-
-        Returns information about an existsing upload session
+        Returns information about an existsing upload session.
       parameters:
         - name: upload_id
           in: path
-          description: ID of the upload session
+          description: ID of the upload session.
           required: true
           schema:
             type: string
     post:
       responses:
         '200':
-          description: OK
+          description: >-
+            A FileResponse or None if the upload is not complete.
           content:
             application/json:
               schema:
@@ -1242,7 +1422,7 @@ paths:
       parameters:
         - name: upload_id
           in: path
-          description: ID of the upload session
+          description: ID of the upload session.
           required: true
           schema:
             type: string
@@ -1257,7 +1437,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A VectorDB.
           content:
             application/json:
               schema:
@@ -1274,10 +1454,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
-      description: ''
+      description: Get a vector database by its identifier.
       parameters:
         - name: vector_db_id
           in: path
+          description: >-
+            The identifier of the vector database to get.
           required: true
           schema:
             type: string
@@ -1297,10 +1479,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
-      description: ''
+      description: Unregister a vector database.
       parameters:
         - name: vector_db_id
           in: path
+          description: >-
+            The identifier of the vector database to unregister.
           required: true
           schema:
             type: string
@@ -1308,7 +1492,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A HealthInfo.
           content:
             application/json:
               schema:
@@ -1325,7 +1509,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      description: ''
+      description: Get the health of the service.
       parameters: []
   /v1/tool-runtime/rag-tool/insert:
     post:
@@ -1370,7 +1554,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorIO
-      description: ''
+      description: Insert chunks into a vector database.
       parameters: []
       requestBody:
         content:
@@ -1382,7 +1566,8 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: >-
+            A ProviderInfo object containing the provider's details.
           content:
             application/json:
               schema:
@@ -1399,10 +1584,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Providers
-      description: ''
+      description: >-
+        Get detailed information about a specific provider.
       parameters:
         - name: provider_id
           in: path
+          description: The ID of the provider to inspect.
           required: true
           schema:
             type: string
@@ -1410,7 +1597,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A ToolInvocationResult.
           content:
             application/json:
               schema:
@@ -1427,7 +1614,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
-      description: Run a tool with the given arguments
+      description: Run a tool with the given arguments.
       parameters: []
       requestBody:
         content:
@@ -1439,7 +1626,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A PaginatedResponse.
           content:
             application/json:
               schema:
@@ -1468,9 +1655,9 @@ paths:
 
         The response includes:
 
-        - data: List of items for the current page
+        - data: List of items for the current page.
 
-        - has_more: Whether there are more items available after this set
+        - has_more: Whether there are more items available after this set.
       parameters:
         - name: dataset_id
           in: path
@@ -1496,7 +1683,7 @@ paths:
     get:
       responses:
         '200':
-          description: The status of the evaluationjob.
+          description: The status of the evaluation job.
           content:
             application/json:
               schema:
@@ -1599,11 +1786,11 @@ paths:
     get:
       responses:
         '200':
-          description: A ListAgentSessionsResponse.
+          description: A PaginatedResponse.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListAgentSessionsResponse'
+                $ref: '#/components/schemas/PaginatedResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
         '429':
@@ -1625,11 +1812,23 @@ paths:
           required: true
           schema:
             type: string
+        - name: start_index
+          in: query
+          description: The index to start the pagination from.
+          required: false
+          schema:
+            type: integer
+        - name: limit
+          in: query
+          description: The number of sessions to return.
+          required: false
+          schema:
+            type: integer
   /v1/eval/benchmarks:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListBenchmarksResponse.
           content:
             application/json:
               schema:
@@ -1646,7 +1845,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
-      description: ''
+      description: List all benchmarks.
       parameters: []
     post:
       responses:
@@ -1664,7 +1863,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
-      description: ''
+      description: Register a benchmark.
       parameters: []
       requestBody:
         content:
@@ -1672,11 +1871,94 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterBenchmarkRequest'
         required: true
+  /v1/openai/v1/chat/completions:
+    get:
+      responses:
+        '200':
+          description: A ListOpenAIChatCompletionResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIChatCompletionResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      description: List all chat completions.
+      parameters:
+        - name: after
+          in: query
+          description: >-
+            The ID of the last chat completion to return.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: >-
+            The maximum number of chat completions to return.
+          required: false
+          schema:
+            type: integer
+        - name: model
+          in: query
+          description: The model to filter by.
+          required: false
+          schema:
+            type: string
+        - name: order
+          in: query
+          description: >-
+            The order to sort the chat completions by: "asc" or "desc". Defaults to
+            "desc".
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
+    post:
+      responses:
+        '200':
+          description: An OpenAIChatCompletion.
+          content:
+            application/json:
+              schema:
+                oneOf:
+                  - $ref: '#/components/schemas/OpenAIChatCompletion'
+                  - $ref: '#/components/schemas/OpenAIChatCompletionChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      description: >-
+        Generate an OpenAI-compatible chat completion for the given messages using
+        the specified model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiChatCompletionRequest'
+        required: true
   /v1/datasets:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListDatasetsResponse.
           content:
             application/json:
               schema:
@@ -1693,12 +1975,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
-      description: ''
+      description: List all datasets.
       parameters: []
     post:
       responses:
         '200':
-          description: OK
+          description: A Dataset.
           content:
             application/json:
               schema:
@@ -1727,7 +2009,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListFileResponse.
           content:
             application/json:
               schema:
@@ -1748,7 +2030,7 @@ paths:
       parameters:
         - name: bucket
           in: path
-          description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-).'
           required: true
           schema:
             type: string
@@ -1756,7 +2038,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListModelsResponse.
           content:
             application/json:
               schema:
@@ -1773,12 +2055,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: List all models.
       parameters: []
     post:
       responses:
         '200':
-          description: OK
+          description: A Model.
           content:
             application/json:
               schema:
@@ -1795,7 +2077,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: Register a model.
       parameters: []
       requestBody:
         content:
@@ -1803,11 +2085,81 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterModelRequest'
         required: true
+  /v1/openai/v1/responses/{response_id}/input_items:
+    get:
+      responses:
+        '200':
+          description: An ListOpenAIResponseInputItem.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIResponseInputItem'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      description: >-
+        List input items for a given OpenAI response.
+      parameters:
+        - name: response_id
+          in: path
+          description: >-
+            The ID of the response to retrieve input items for.
+          required: true
+          schema:
+            type: string
+        - name: after
+          in: query
+          description: >-
+            An item ID to list items after, used for pagination.
+          required: false
+          schema:
+            type: string
+        - name: before
+          in: query
+          description: >-
+            An item ID to list items before, used for pagination.
+          required: false
+          schema:
+            type: string
+        - name: include
+          in: query
+          description: >-
+            Additional fields to include in the response.
+          required: false
+          schema:
+            type: array
+            items:
+              type: string
+        - name: limit
+          in: query
+          description: >-
+            A limit on the number of objects to be returned. Limit can range between
+            1 and 100, and the default is 20.
+          required: false
+          schema:
+            type: integer
+        - name: order
+          in: query
+          description: >-
+            The order to return the input items in. Default is desc.
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
   /v1/providers:
     get:
       responses:
         '200':
-          description: OK
+          description: >-
+            A ListProvidersResponse containing information about all providers.
           content:
             application/json:
               schema:
@@ -1824,13 +2176,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Providers
-      description: ''
+      description: List all available providers.
       parameters: []
   /v1/inspect/routes:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListRoutesResponse.
           content:
             application/json:
               schema:
@@ -1847,13 +2199,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      description: ''
+      description: List all routes.
       parameters: []
   /v1/tool-runtime/list-tools:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListToolDefsResponse.
           content:
             application/json:
               schema:
@@ -1870,15 +2222,19 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
-      description: ''
+      description: List all tools in the runtime.
       parameters:
         - name: tool_group_id
           in: query
+          description: >-
+            The ID of the tool group to list tools for.
           required: false
           schema:
             type: string
         - name: mcp_endpoint
           in: query
+          description: >-
+            The MCP endpoint to use for the tool group.
           required: false
           schema:
             $ref: '#/components/schemas/URL'
@@ -1886,7 +2242,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListScoringFunctionsResponse.
           content:
             application/json:
               schema:
@@ -1903,7 +2259,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
-      description: ''
+      description: List all scoring functions.
       parameters: []
     post:
       responses:
@@ -1921,7 +2277,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
-      description: ''
+      description: Register a scoring function.
       parameters: []
       requestBody:
         content:
@@ -1933,7 +2289,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListShieldsResponse.
           content:
             application/json:
               schema:
@@ -1950,12 +2306,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
-      description: ''
+      description: List all shields.
       parameters: []
     post:
       responses:
         '200':
-          description: OK
+          description: A Shield.
           content:
             application/json:
               schema:
@@ -1972,7 +2328,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
-      description: ''
+      description: Register a shield.
       parameters: []
       requestBody:
         content:
@@ -1984,7 +2340,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListToolGroupsResponse.
           content:
             application/json:
               schema:
@@ -2001,7 +2357,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: List tool groups with optional provider
+      description: List tool groups with optional provider.
       parameters: []
     post:
       responses:
@@ -2019,7 +2375,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: Register a tool group
+      description: Register a tool group.
       parameters: []
       requestBody:
         content:
@@ -2031,7 +2387,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListToolsResponse.
           content:
             application/json:
               schema:
@@ -2048,10 +2404,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: List tools with optional tool group
+      description: List tools with optional tool group.
       parameters:
         - name: toolgroup_id
           in: query
+          description: >-
+            The ID of the tool group to list tools for.
           required: false
           schema:
             type: string
@@ -2059,7 +2417,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListVectorDBsResponse.
           content:
             application/json:
               schema:
@@ -2076,12 +2434,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
-      description: ''
+      description: List all vector databases.
       parameters: []
     post:
       responses:
         '200':
-          description: OK
+          description: A VectorDB.
           content:
             application/json:
               schema:
@@ -2098,7 +2456,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
-      description: ''
+      description: Register a vector database.
       parameters: []
       requestBody:
         content:
@@ -2123,7 +2481,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Log an event.
       parameters: []
       requestBody:
         content:
@@ -2131,42 +2489,11 @@ paths:
             schema:
               $ref: '#/components/schemas/LogEventRequest'
         required: true
-  /v1/openai/v1/chat/completions:
-    post:
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIChatCompletion'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Inference
-      description: >-
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/OpenaiChatCompletionRequest'
-        required: true
   /v1/openai/v1/completions:
     post:
       responses:
         '200':
-          description: OK
+          description: An OpenAICompletion.
           content:
             application/json:
               schema:
@@ -2193,11 +2520,43 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiCompletionRequest'
         required: true
+  /v1/openai/v1/embeddings:
+    post:
+      responses:
+        '200':
+          description: >-
+            An OpenAIEmbeddingsResponse containing the embeddings.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIEmbeddingsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      description: >-
+        Generate OpenAI-compatible embeddings for the given input using the specified
+        model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiEmbeddingsRequest'
+        required: true
   /v1/openai/v1/models:
     get:
       responses:
         '200':
-          description: OK
+          description: A OpenAIListModelsResponse.
           content:
             application/json:
               schema:
@@ -2214,13 +2573,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: List models using the OpenAI API.
       parameters: []
   /v1/post-training/preference-optimize:
     post:
       responses:
         '200':
-          description: OK
+          description: A PostTrainingJob.
           content:
             application/json:
               schema:
@@ -2237,7 +2596,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Run preference optimization of a model.
       parameters: []
       requestBody:
         content:
@@ -2279,7 +2638,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A QueryChunksResponse.
           content:
             application/json:
               schema:
@@ -2296,7 +2655,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorIO
-      description: ''
+      description: Query chunks from a vector database.
       parameters: []
       requestBody:
         content:
@@ -2304,11 +2663,46 @@ paths:
             schema:
               $ref: '#/components/schemas/QueryChunksRequest'
         required: true
+  /v1/telemetry/metrics/{metric_name}:
+    post:
+      responses:
+        '200':
+          description: A QueryMetricsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/QueryMetricsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Telemetry
+      description: Query metrics.
+      parameters:
+        - name: metric_name
+          in: path
+          description: The name of the metric to query.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/QueryMetricsRequest'
+        required: true
   /v1/telemetry/spans:
     post:
       responses:
         '200':
-          description: OK
+          description: A QuerySpansResponse.
           content:
             application/json:
               schema:
@@ -2325,7 +2719,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Query spans.
       parameters: []
       requestBody:
         content:
@@ -2337,7 +2731,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A QueryTracesResponse.
           content:
             application/json:
               schema:
@@ -2354,7 +2748,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Query traces.
       parameters: []
       requestBody:
         content:
@@ -2460,7 +2854,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A RunShieldResponse.
           content:
             application/json:
               schema:
@@ -2477,7 +2871,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Safety
-      description: ''
+      description: Run a shield.
       parameters: []
       requestBody:
         content:
@@ -2502,7 +2896,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Save spans to a dataset.
       parameters: []
       requestBody:
         content:
@@ -2515,7 +2909,7 @@ paths:
       responses:
         '200':
           description: >-
-            ScoreResponse object containing rows and aggregated results
+            A ScoreResponse object containing rows and aggregated results.
           content:
             application/json:
               schema:
@@ -2544,7 +2938,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A ScoreBatchResponse.
           content:
             application/json:
               schema:
@@ -2561,7 +2955,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Scoring
-      description: ''
+      description: Score a batch of rows.
       parameters: []
       requestBody:
         content:
@@ -2573,7 +2967,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A PostTrainingJob.
           content:
             application/json:
               schema:
@@ -2590,7 +2984,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Run supervised fine-tuning of a model.
       parameters: []
       requestBody:
         content:
@@ -2631,7 +3025,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A VersionInfo.
           content:
             application/json:
               schema:
@@ -2648,7 +3042,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      description: ''
+      description: Get the version of the service.
       parameters: []
 jsonSchemaDialect: >-
   https://json-schema.org/draft/2020-12/schema
@@ -2697,6 +3091,7 @@ components:
                 - type: string
                 - type: array
                 - type: object
+          description: The rows to append to the dataset.
       additionalProperties: false
       required:
         - rows
@@ -2745,10 +3140,13 @@ components:
       properties:
         type:
           type: string
-          const: grammar
-          default: grammar
+          enum:
+            - json_schema
+            - grammar
           description: >-
             Must be "grammar" to identify this format type
+          const: grammar
+          default: grammar
         bnf:
           type: object
           additionalProperties:
@@ -2830,10 +3228,13 @@ components:
       properties:
         type:
           type: string
-          const: json_schema
-          default: json_schema
+          enum:
+            - json_schema
+            - grammar
           description: >-
             Must be "json_schema" to identify this format type
+          const: json_schema
+          default: json_schema
         json_schema:
           type: object
           additionalProperties:
@@ -3195,22 +3596,34 @@ components:
       properties:
         model_id:
           type: string
+          description: >-
+            The identifier of the model to use. The model must be registered with
+            Llama Stack and available via the /models endpoint.
         messages_batch:
           type: array
           items:
             type: array
             items:
               $ref: '#/components/schemas/Message'
+          description: >-
+            The messages to generate completions for.
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
+          description: >-
+            (Optional) Parameters to control the sampling strategy.
         tools:
           type: array
           items:
             $ref: '#/components/schemas/ToolDefinition'
+          description: >-
+            (Optional) List of tool definitions available to the model.
         tool_config:
           $ref: '#/components/schemas/ToolConfig'
+          description: (Optional) Configuration for tool use.
         response_format:
           $ref: '#/components/schemas/ResponseFormat'
+          description: >-
+            (Optional) Grammar specification for guided (structured) decoding.
         logprobs:
           type: object
           properties:
@@ -3220,7 +3633,9 @@ components:
               description: >-
                 How many tokens (for each position) to return log probabilities for.
           additionalProperties: false
-          title: LogProbConfig
+          description: >-
+            (Optional) If specified, log probabilities for each token position will
+            be returned.
       additionalProperties: false
       required:
         - model_id
@@ -3293,14 +3708,22 @@ components:
       properties:
         model_id:
           type: string
+          description: >-
+            The identifier of the model to use. The model must be registered with
+            Llama Stack and available via the /models endpoint.
         content_batch:
           type: array
           items:
             $ref: '#/components/schemas/InterleavedContent'
+          description: The content to generate completions for.
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
+          description: >-
+            (Optional) Parameters to control the sampling strategy.
         response_format:
           $ref: '#/components/schemas/ResponseFormat'
+          description: >-
+            (Optional) Grammar specification for guided (structured) decoding.
         logprobs:
           type: object
           properties:
@@ -3310,7 +3733,9 @@ components:
               description: >-
                 How many tokens (for each position) to return log probabilities for.
           additionalProperties: false
-          title: LogProbConfig
+          description: >-
+            (Optional) If specified, log probabilities for each token position will
+            be returned.
       additionalProperties: false
       required:
         - model_id
@@ -3361,6 +3786,7 @@ components:
       properties:
         job_uuid:
           type: string
+          description: The UUID of the job to cancel.
       additionalProperties: false
       required:
         - job_uuid
@@ -3377,17 +3803,17 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/Message'
-          description: List of messages in the conversation
+          description: List of messages in the conversation.
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
           description: >-
-            Parameters to control the sampling strategy
+            Parameters to control the sampling strategy.
         tools:
           type: array
           items:
             $ref: '#/components/schemas/ToolDefinition'
           description: >-
-            (Optional) List of tool definitions available to the model
+            (Optional) List of tool definitions available to the model.
         tool_choice:
           type: string
           enum:
@@ -3570,15 +3996,16 @@ components:
             Llama Stack and available via the /models endpoint.
         content:
           $ref: '#/components/schemas/InterleavedContent'
-          description: The content to generate a completion for
+          description: >-
+            The content to generate a completion for.
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
           description: >-
-            (Optional) Parameters to control the sampling strategy
+            (Optional) Parameters to control the sampling strategy.
         response_format:
           $ref: '#/components/schemas/ResponseFormat'
           description: >-
-            (Optional) Grammar specification for guided (structured) decoding
+            (Optional) Grammar specification for guided (structured) decoding.
         stream:
           type: boolean
           description: >-
@@ -3682,18 +4109,29 @@ components:
           default: 10
         model:
           type: string
+          description: >-
+            The model identifier to use for the agent
         instructions:
           type: string
+          description: The system instructions for the agent
+        name:
+          type: string
+          description: >-
+            Optional name for the agent, used in telemetry and identification
         enable_session_persistence:
           type: boolean
           default: false
+          description: >-
+            Optional flag indicating whether session data has to be persisted
         response_format:
           $ref: '#/components/schemas/ResponseFormat'
+          description: Optional response format configuration
       additionalProperties: false
       required:
         - model
         - instructions
       title: AgentConfig
+      description: Configuration for an agent.
     AgentTool:
       oneOf:
         - type: string
@@ -3881,6 +4319,13 @@ components:
           description: The time the step completed.
         step_type:
           type: string
+          enum:
+            - inference
+            - tool_execution
+            - shield_call
+            - memory_retrieval
+          title: StepType
+          description: Type of the step in an agent turn.
           const: inference
           default: inference
         model_response:
@@ -3913,6 +4358,13 @@ components:
           description: The time the step completed.
         step_type:
           type: string
+          enum:
+            - inference
+            - tool_execution
+            - shield_call
+            - memory_retrieval
+          title: StepType
+          description: Type of the step in an agent turn.
           const: memory_retrieval
           default: memory_retrieval
         vector_db_ids:
@@ -3974,6 +4426,13 @@ components:
           description: The time the step completed.
         step_type:
           type: string
+          enum:
+            - inference
+            - tool_execution
+            - shield_call
+            - memory_retrieval
+          title: StepType
+          description: Type of the step in an agent turn.
           const: shield_call
           default: shield_call
         violation:
@@ -4005,6 +4464,13 @@ components:
           description: The time the step completed.
         step_type:
           type: string
+          enum:
+            - inference
+            - tool_execution
+            - shield_call
+            - memory_retrieval
+          title: StepType
+          description: Type of the step in an agent turn.
           const: tool_execution
           default: tool_execution
         tool_calls:
@@ -4167,6 +4633,14 @@ components:
       properties:
         event_type:
           type: string
+          enum:
+            - step_start
+            - step_complete
+            - step_progress
+            - turn_start
+            - turn_complete
+            - turn_awaiting_input
+          title: AgentTurnResponseEventType
           const: step_complete
           default: step_complete
         step_type:
@@ -4205,6 +4679,14 @@ components:
       properties:
         event_type:
           type: string
+          enum:
+            - step_start
+            - step_complete
+            - step_progress
+            - turn_start
+            - turn_complete
+            - turn_awaiting_input
+          title: AgentTurnResponseEventType
           const: step_progress
           default: step_progress
         step_type:
@@ -4232,6 +4714,14 @@ components:
       properties:
         event_type:
           type: string
+          enum:
+            - step_start
+            - step_complete
+            - step_progress
+            - turn_start
+            - turn_complete
+            - turn_awaiting_input
+          title: AgentTurnResponseEventType
           const: step_start
           default: step_start
         step_type:
@@ -4276,6 +4766,14 @@ components:
       properties:
         event_type:
           type: string
+          enum:
+            - step_start
+            - step_complete
+            - step_progress
+            - turn_start
+            - turn_complete
+            - turn_awaiting_input
+          title: AgentTurnResponseEventType
           const: turn_awaiting_input
           default: turn_awaiting_input
         turn:
@@ -4291,6 +4789,14 @@ components:
       properties:
         event_type:
           type: string
+          enum:
+            - step_start
+            - step_complete
+            - step_progress
+            - turn_start
+            - turn_complete
+            - turn_awaiting_input
+          title: AgentTurnResponseEventType
           const: turn_complete
           default: turn_complete
         turn:
@@ -4305,6 +4811,14 @@ components:
       properties:
         event_type:
           type: string
+          enum:
+            - step_start
+            - step_complete
+            - step_progress
+            - turn_start
+            - turn_complete
+            - turn_awaiting_input
+          title: AgentTurnResponseEventType
           const: turn_start
           default: turn_start
         turn_id:
@@ -4314,23 +4828,586 @@ components:
         - event_type
         - turn_id
       title: AgentTurnResponseTurnStartPayload
+    OpenAIResponseInput:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        - $ref: '#/components/schemas/OpenAIResponseMessage'
+    "OpenAIResponseInputFunctionToolCallOutput":
+      type: object
+      properties:
+        call_id:
+          type: string
+        output:
+          type: string
+        type:
+          type: string
+          const: function_call_output
+          default: function_call_output
+        id:
+          type: string
+        status:
+          type: string
+      additionalProperties: false
+      required:
+        - call_id
+        - output
+        - type
+      title: >-
+        OpenAIResponseInputFunctionToolCallOutput
+      description: >-
+        This represents the output of a function call that gets passed back to the
+        model.
+    OpenAIResponseInputMessageContent:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+      discriminator:
+        propertyName: type
+        mapping:
+          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+    OpenAIResponseInputMessageContentImage:
+      type: object
+      properties:
+        detail:
+          oneOf:
+            - type: string
+              const: low
+            - type: string
+              const: high
+            - type: string
+              const: auto
+          default: auto
+        type:
+          type: string
+          const: input_image
+          default: input_image
+        image_url:
+          type: string
+      additionalProperties: false
+      required:
+        - detail
+        - type
+      title: OpenAIResponseInputMessageContentImage
+    OpenAIResponseInputMessageContentText:
+      type: object
+      properties:
+        text:
+          type: string
+        type:
+          type: string
+          const: input_text
+          default: input_text
+      additionalProperties: false
+      required:
+        - text
+        - type
+      title: OpenAIResponseInputMessageContentText
+    OpenAIResponseInputTool:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+      discriminator:
+        propertyName: type
+        mapping:
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+          function: '#/components/schemas/OpenAIResponseInputToolFunction'
+          mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+    OpenAIResponseInputToolFileSearch:
+      type: object
+      properties:
+        type:
+          type: string
+          const: file_search
+          default: file_search
+        vector_store_id:
+          type: array
+          items:
+            type: string
+        ranking_options:
+          type: object
+          properties:
+            ranker:
+              type: string
+            score_threshold:
+              type: number
+              default: 0.0
+          additionalProperties: false
+          title: FileSearchRankingOptions
+      additionalProperties: false
+      required:
+        - type
+        - vector_store_id
+      title: OpenAIResponseInputToolFileSearch
+    OpenAIResponseInputToolFunction:
+      type: object
+      properties:
+        type:
+          type: string
+          const: function
+          default: function
+        name:
+          type: string
+        description:
+          type: string
+        parameters:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        strict:
+          type: boolean
+      additionalProperties: false
+      required:
+        - type
+        - name
+      title: OpenAIResponseInputToolFunction
+    OpenAIResponseInputToolMCP:
+      type: object
+      properties:
+        type:
+          type: string
+          const: mcp
+          default: mcp
+        server_label:
+          type: string
+        server_url:
+          type: string
+        headers:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        require_approval:
+          oneOf:
+            - type: string
+              const: always
+            - type: string
+              const: never
+            - type: object
+              properties:
+                always:
+                  type: array
+                  items:
+                    type: string
+                never:
+                  type: array
+                  items:
+                    type: string
+              additionalProperties: false
+              title: ApprovalFilter
+          default: never
+        allowed_tools:
+          oneOf:
+            - type: array
+              items:
+                type: string
+            - type: object
+              properties:
+                tool_names:
+                  type: array
+                  items:
+                    type: string
+              additionalProperties: false
+              title: AllowedToolsFilter
+      additionalProperties: false
+      required:
+        - type
+        - server_label
+        - server_url
+        - require_approval
+      title: OpenAIResponseInputToolMCP
+    OpenAIResponseInputToolWebSearch:
+      type: object
+      properties:
+        type:
+          oneOf:
+            - type: string
+              const: web_search
+            - type: string
+              const: web_search_preview_2025_03_11
+          default: web_search
+        search_context_size:
+          type: string
+          default: medium
+      additionalProperties: false
+      required:
+        - type
+      title: OpenAIResponseInputToolWebSearch
+    OpenAIResponseMessage:
+      type: object
+      properties:
+        content:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
+        role:
+          oneOf:
+            - type: string
+              const: system
+            - type: string
+              const: developer
+            - type: string
+              const: user
+            - type: string
+              const: assistant
+        type:
+          type: string
+          const: message
+          default: message
+        id:
+          type: string
+        status:
+          type: string
+      additionalProperties: false
+      required:
+        - content
+        - role
+        - type
+      title: OpenAIResponseMessage
+      description: >-
+        Corresponds to the various Message types in the Responses API. They are all
+        under one type because the Responses API gives them all the same "type" value,
+        and there is no way to tell them apart in certain scenarios.
+    OpenAIResponseOutputMessageContent:
+      type: object
+      properties:
+        text:
+          type: string
+        type:
+          type: string
+          const: output_text
+          default: output_text
+      additionalProperties: false
+      required:
+        - text
+        - type
+      title: >-
+        OpenAIResponseOutputMessageContentOutputText
+    "OpenAIResponseOutputMessageFunctionToolCall":
+      type: object
+      properties:
+        call_id:
+          type: string
+        name:
+          type: string
+        arguments:
+          type: string
+        type:
+          type: string
+          const: function_call
+          default: function_call
+        id:
+          type: string
+        status:
+          type: string
+      additionalProperties: false
+      required:
+        - call_id
+        - name
+        - arguments
+        - type
+      title: >-
+        OpenAIResponseOutputMessageFunctionToolCall
+    "OpenAIResponseOutputMessageWebSearchToolCall":
+      type: object
+      properties:
+        id:
+          type: string
+        status:
+          type: string
+        type:
+          type: string
+          const: web_search_call
+          default: web_search_call
+      additionalProperties: false
+      required:
+        - id
+        - status
+        - type
+      title: >-
+        OpenAIResponseOutputMessageWebSearchToolCall
+    CreateOpenaiResponseRequest:
+      type: object
+      properties:
+        input:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: Input message(s) to create the response.
+        model:
+          type: string
+          description: The underlying LLM used for completions.
+        instructions:
+          type: string
+        previous_response_id:
+          type: string
+          description: >-
+            (Optional) if specified, the new response will be a continuation of the
+            previous response. This can be used to easily fork-off new responses from
+            existing responses.
+        store:
+          type: boolean
+        stream:
+          type: boolean
+        temperature:
+          type: number
+        tools:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInputTool'
+      additionalProperties: false
+      required:
+        - input
+        - model
+      title: CreateOpenaiResponseRequest
+    OpenAIResponseError:
+      type: object
+      properties:
+        code:
+          type: string
+        message:
+          type: string
+      additionalProperties: false
+      required:
+        - code
+        - message
+      title: OpenAIResponseError
+    OpenAIResponseObject:
+      type: object
+      properties:
+        created_at:
+          type: integer
+        error:
+          $ref: '#/components/schemas/OpenAIResponseError'
+        id:
+          type: string
+        model:
+          type: string
+        object:
+          type: string
+          const: response
+          default: response
+        output:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseOutput'
+        parallel_tool_calls:
+          type: boolean
+          default: false
+        previous_response_id:
+          type: string
+        status:
+          type: string
+        temperature:
+          type: number
+        top_p:
+          type: number
+        truncation:
+          type: string
+        user:
+          type: string
+      additionalProperties: false
+      required:
+        - created_at
+        - id
+        - model
+        - object
+        - output
+        - parallel_tool_calls
+        - status
+      title: OpenAIResponseObject
+    OpenAIResponseOutput:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseMessage'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+      discriminator:
+        propertyName: type
+        mapping:
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+    OpenAIResponseOutputMessageMCPCall:
+      type: object
+      properties:
+        id:
+          type: string
+        type:
+          type: string
+          const: mcp_call
+          default: mcp_call
+        arguments:
+          type: string
+        name:
+          type: string
+        server_label:
+          type: string
+        error:
+          type: string
+        output:
+          type: string
+      additionalProperties: false
+      required:
+        - id
+        - type
+        - arguments
+        - name
+        - server_label
+      title: OpenAIResponseOutputMessageMCPCall
+    OpenAIResponseOutputMessageMCPListTools:
+      type: object
+      properties:
+        id:
+          type: string
+        type:
+          type: string
+          const: mcp_list_tools
+          default: mcp_list_tools
+        server_label:
+          type: string
+        tools:
+          type: array
+          items:
+            type: object
+            properties:
+              input_schema:
+                type: object
+                additionalProperties:
+                  oneOf:
+                    - type: 'null'
+                    - type: boolean
+                    - type: number
+                    - type: string
+                    - type: array
+                    - type: object
+              name:
+                type: string
+              description:
+                type: string
+            additionalProperties: false
+            required:
+              - input_schema
+              - name
+            title: MCPListToolsTool
+      additionalProperties: false
+      required:
+        - id
+        - type
+        - server_label
+        - tools
+      title: OpenAIResponseOutputMessageMCPListTools
+    OpenAIResponseObjectStream:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+      discriminator:
+        propertyName: type
+        mapping:
+          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+    "OpenAIResponseObjectStreamResponseCompleted":
+      type: object
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        type:
+          type: string
+          const: response.completed
+          default: response.completed
+      additionalProperties: false
+      required:
+        - response
+        - type
+      title: >-
+        OpenAIResponseObjectStreamResponseCompleted
+    "OpenAIResponseObjectStreamResponseCreated":
+      type: object
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        type:
+          type: string
+          const: response.created
+          default: response.created
+      additionalProperties: false
+      required:
+        - response
+        - type
+      title: >-
+        OpenAIResponseObjectStreamResponseCreated
+    "OpenAIResponseObjectStreamResponseOutputTextDelta":
+      type: object
+      properties:
+        content_index:
+          type: integer
+        delta:
+          type: string
+        item_id:
+          type: string
+        output_index:
+          type: integer
+        sequence_number:
+          type: integer
+        type:
+          type: string
+          const: response.output_text.delta
+          default: response.output_text.delta
+      additionalProperties: false
+      required:
+        - content_index
+        - delta
+        - item_id
+        - output_index
+        - sequence_number
+        - type
+      title: >-
+        OpenAIResponseObjectStreamResponseOutputTextDelta
     CreateUploadSessionRequest:
       type: object
       properties:
         bucket:
           type: string
           description: >-
-            Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+            Bucket under which the file is stored (valid chars: a-zA-Z0-9_-).
         key:
           type: string
           description: >-
-            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
         mime_type:
           type: string
-          description: MIME type of the file
+          description: MIME type of the file.
         size:
           type: integer
-          description: File size in bytes
+          description: File size in bytes.
       additionalProperties: false
       required:
         - bucket
@@ -4458,7 +5535,7 @@ components:
       type: object
       properties:
         type:
-          type: string
+          $ref: '#/components/schemas/ScoringFnParamsType'
           const: basic
           default: basic
         aggregation_functions:
@@ -4468,6 +5545,7 @@ components:
       additionalProperties: false
       required:
         - type
+        - aggregation_functions
       title: BasicScoringFnParams
     BenchmarkConfig:
       type: object
@@ -4507,7 +5585,7 @@ components:
       type: object
       properties:
         type:
-          type: string
+          $ref: '#/components/schemas/ScoringFnParamsType'
           const: llm_as_judge
           default: llm_as_judge
         judge_model:
@@ -4526,6 +5604,8 @@ components:
       required:
         - type
         - judge_model
+        - judge_score_regexes
+        - aggregation_functions
       title: LLMAsJudgeScoringFnParams
     ModelCandidate:
       type: object
@@ -4556,7 +5636,7 @@ components:
       type: object
       properties:
         type:
-          type: string
+          $ref: '#/components/schemas/ScoringFnParamsType'
           const: regex_parser
           default: regex_parser
         parsing_regexes:
@@ -4570,6 +5650,8 @@ components:
       additionalProperties: false
       required:
         - type
+        - parsing_regexes
+        - aggregation_functions
       title: RegexParserScoringFnParams
     ScoringFnParams:
       oneOf:
@@ -4582,6 +5664,13 @@ components:
           llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
           regex_parser: '#/components/schemas/RegexParserScoringFnParams'
           basic: '#/components/schemas/BasicScoringFnParams'
+    ScoringFnParamsType:
+      type: string
+      enum:
+        - llm_as_judge
+        - regex_parser
+        - basic
+      title: ScoringFnParamsType
     EvaluateRowsRequest:
       type: object
       properties:
@@ -4744,6 +5833,16 @@ components:
           type: string
         type:
           type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+          title: ResourceType
           const: benchmark
           default: benchmark
         dataset_id:
@@ -4765,13 +5864,375 @@ components:
       additionalProperties: false
       required:
         - identifier
-        - provider_resource_id
         - provider_id
         - type
         - dataset_id
         - scoring_functions
         - metadata
       title: Benchmark
+    OpenAIAssistantMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: assistant
+          default: assistant
+          description: >-
+            Must be "assistant" to identify this as the model's response
+        content:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+          description: The content of the model's response
+        name:
+          type: string
+          description: >-
+            (Optional) The name of the assistant message participant.
+        tool_calls:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+          description: >-
+            List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
+            object.
+      additionalProperties: false
+      required:
+        - role
+      title: OpenAIAssistantMessageParam
+      description: >-
+        A message containing the model's (assistant) response in an OpenAI-compatible
+        chat completion request.
+    "OpenAIChatCompletionContentPartImageParam":
+      type: object
+      properties:
+        type:
+          type: string
+          const: image_url
+          default: image_url
+        image_url:
+          $ref: '#/components/schemas/OpenAIImageURL'
+      additionalProperties: false
+      required:
+        - type
+        - image_url
+      title: >-
+        OpenAIChatCompletionContentPartImageParam
+    OpenAIChatCompletionContentPartParam:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+      discriminator:
+        propertyName: type
+        mapping:
+          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+    OpenAIChatCompletionContentPartTextParam:
+      type: object
+      properties:
+        type:
+          type: string
+          const: text
+          default: text
+        text:
+          type: string
+      additionalProperties: false
+      required:
+        - type
+        - text
+      title: OpenAIChatCompletionContentPartTextParam
+    OpenAIChatCompletionToolCall:
+      type: object
+      properties:
+        index:
+          type: integer
+        id:
+          type: string
+        type:
+          type: string
+          const: function
+          default: function
+        function:
+          $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
+      additionalProperties: false
+      required:
+        - type
+      title: OpenAIChatCompletionToolCall
+    OpenAIChatCompletionToolCallFunction:
+      type: object
+      properties:
+        name:
+          type: string
+        arguments:
+          type: string
+      additionalProperties: false
+      title: OpenAIChatCompletionToolCallFunction
+    OpenAIChoice:
+      type: object
+      properties:
+        message:
+          $ref: '#/components/schemas/OpenAIMessageParam'
+          description: The message from the model
+        finish_reason:
+          type: string
+          description: The reason the model stopped generating
+        index:
+          type: integer
+          description: The index of the choice
+        logprobs:
+          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+          description: >-
+            (Optional) The log probabilities for the tokens in the message
+      additionalProperties: false
+      required:
+        - message
+        - finish_reason
+        - index
+      title: OpenAIChoice
+      description: >-
+        A choice from an OpenAI-compatible chat completion response.
+    OpenAIChoiceLogprobs:
+      type: object
+      properties:
+        content:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAITokenLogProb'
+          description: >-
+            (Optional) The log probabilities for the tokens in the message
+        refusal:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAITokenLogProb'
+          description: >-
+            (Optional) The log probabilities for the tokens in the message
+      additionalProperties: false
+      title: OpenAIChoiceLogprobs
+      description: >-
+        The log probabilities for the tokens in the message from an OpenAI-compatible
+        chat completion response.
+    OpenAIDeveloperMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: developer
+          default: developer
+          description: >-
+            Must be "developer" to identify this as a developer message
+        content:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+          description: The content of the developer message
+        name:
+          type: string
+          description: >-
+            (Optional) The name of the developer message participant.
+      additionalProperties: false
+      required:
+        - role
+        - content
+      title: OpenAIDeveloperMessageParam
+      description: >-
+        A message from the developer in an OpenAI-compatible chat completion request.
+    OpenAIImageURL:
+      type: object
+      properties:
+        url:
+          type: string
+        detail:
+          type: string
+      additionalProperties: false
+      required:
+        - url
+      title: OpenAIImageURL
+    OpenAIMessageParam:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIUserMessageParam'
+        - $ref: '#/components/schemas/OpenAISystemMessageParam'
+        - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
+        - $ref: '#/components/schemas/OpenAIToolMessageParam'
+        - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+      discriminator:
+        propertyName: role
+        mapping:
+          user: '#/components/schemas/OpenAIUserMessageParam'
+          system: '#/components/schemas/OpenAISystemMessageParam'
+          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
+          tool: '#/components/schemas/OpenAIToolMessageParam'
+          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+    OpenAISystemMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: system
+          default: system
+          description: >-
+            Must be "system" to identify this as a system message
+        content:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+          description: >-
+            The content of the "system prompt". If multiple system messages are provided,
+            they are concatenated. The underlying Llama Stack code may also add other
+            system messages (for example, for formatting tool definitions).
+        name:
+          type: string
+          description: >-
+            (Optional) The name of the system message participant.
+      additionalProperties: false
+      required:
+        - role
+        - content
+      title: OpenAISystemMessageParam
+      description: >-
+        A system message providing instructions or context to the model.
+    OpenAITokenLogProb:
+      type: object
+      properties:
+        token:
+          type: string
+        bytes:
+          type: array
+          items:
+            type: integer
+        logprob:
+          type: number
+        top_logprobs:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAITopLogProb'
+      additionalProperties: false
+      required:
+        - token
+        - logprob
+        - top_logprobs
+      title: OpenAITokenLogProb
+      description: >-
+        The log probability for a token from an OpenAI-compatible chat completion
+        response.
+    OpenAIToolMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: tool
+          default: tool
+          description: >-
+            Must be "tool" to identify this as a tool response
+        tool_call_id:
+          type: string
+          description: >-
+            Unique identifier for the tool call this response is for
+        content:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+          description: The response content from the tool
+      additionalProperties: false
+      required:
+        - role
+        - tool_call_id
+        - content
+      title: OpenAIToolMessageParam
+      description: >-
+        A message representing the result of a tool invocation in an OpenAI-compatible
+        chat completion request.
+    OpenAITopLogProb:
+      type: object
+      properties:
+        token:
+          type: string
+        bytes:
+          type: array
+          items:
+            type: integer
+        logprob:
+          type: number
+      additionalProperties: false
+      required:
+        - token
+        - logprob
+      title: OpenAITopLogProb
+      description: >-
+        The top log probability for a token from an OpenAI-compatible chat completion
+        response.
+    OpenAIUserMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: user
+          default: user
+          description: >-
+            Must be "user" to identify this as a user message
+        content:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+          description: >-
+            The content of the message, which can include text and other media
+        name:
+          type: string
+          description: >-
+            (Optional) The name of the user message participant.
+      additionalProperties: false
+      required:
+        - role
+        - content
+      title: OpenAIUserMessageParam
+      description: >-
+        A message from the user in an OpenAI-compatible chat completion request.
+    OpenAICompletionWithInputMessages:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The ID of the chat completion
+        choices:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIChoice'
+          description: List of choices
+        object:
+          type: string
+          const: chat.completion
+          default: chat.completion
+          description: >-
+            The object type, which will be "chat.completion"
+        created:
+          type: integer
+          description: >-
+            The Unix timestamp in seconds when the chat completion was created
+        model:
+          type: string
+          description: >-
+            The model that was used to generate the chat completion
+        input_messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIMessageParam'
+      additionalProperties: false
+      required:
+        - id
+        - choices
+        - object
+        - created
+        - model
+        - input_messages
+      title: OpenAICompletionWithInputMessages
     DataSource:
       oneOf:
         - $ref: '#/components/schemas/URIDataSource'
@@ -4792,6 +6253,16 @@ components:
           type: string
         type:
           type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+          title: ResourceType
           const: dataset
           default: dataset
         purpose:
@@ -4818,7 +6289,6 @@ components:
       additionalProperties: false
       required:
         - identifier
-        - provider_resource_id
         - provider_id
         - type
         - purpose
@@ -4917,6 +6387,16 @@ components:
           type: string
         type:
           type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+          title: ResourceType
           const: model
           default: model
         metadata:
@@ -4935,7 +6415,6 @@ components:
       additionalProperties: false
       required:
         - identifier
-        - provider_resource_id
         - provider_id
         - type
         - metadata
@@ -5071,6 +6550,16 @@ components:
           type: string
         type:
           type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+          title: ResourceType
           const: scoring_function
           default: scoring_function
         description:
@@ -5092,7 +6581,6 @@ components:
       additionalProperties: false
       required:
         - identifier
-        - provider_resource_id
         - provider_id
         - type
         - metadata
@@ -5131,6 +6619,16 @@ components:
           type: string
         type:
           type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+          title: ResourceType
           const: shield
           default: shield
         params:
@@ -5146,7 +6644,6 @@ components:
       additionalProperties: false
       required:
         - identifier
-        - provider_resource_id
         - provider_id
         - type
       title: Shield
@@ -5193,8 +6690,10 @@ components:
           type: array
           items:
             type: string
+          description: The attributes to return in the tree.
         max_depth:
           type: integer
+          description: The maximum depth of the tree.
       additionalProperties: false
       title: GetSpanTreeRequest
     SpanStatus:
@@ -5261,12 +6760,20 @@ components:
           type: string
         type:
           type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+          title: ResourceType
           const: tool
           default: tool
         toolgroup_id:
           type: string
-        tool_host:
-          $ref: '#/components/schemas/ToolHost'
         description:
           type: string
         parameters:
@@ -5286,21 +6793,12 @@ components:
       additionalProperties: false
       required:
         - identifier
-        - provider_resource_id
         - provider_id
         - type
         - toolgroup_id
-        - tool_host
         - description
         - parameters
       title: Tool
-    ToolHost:
-      type: string
-      enum:
-        - distribution
-        - client
-        - model_context_protocol
-      title: ToolHost
     ToolGroup:
       type: object
       properties:
@@ -5312,6 +6810,16 @@ components:
           type: string
         type:
           type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+          title: ResourceType
           const: tool_group
           default: tool_group
         mcp_endpoint:
@@ -5329,7 +6837,6 @@ components:
       additionalProperties: false
       required:
         - identifier
-        - provider_resource_id
         - provider_id
         - type
       title: ToolGroup
@@ -5443,6 +6950,16 @@ components:
           type: string
         type:
           type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+          title: ResourceType
           const: vector_db
           default: vector_db
         embedding_model:
@@ -5452,7 +6969,6 @@ components:
       additionalProperties: false
       required:
         - identifier
-        - provider_resource_id
         - provider_id
         - type
         - embedding_model
@@ -5463,6 +6979,11 @@ components:
       properties:
         status:
           type: string
+          enum:
+            - OK
+            - Error
+            - Not Implemented
+          title: HealthStatus
       additionalProperties: false
       required:
         - status
@@ -5526,6 +7047,8 @@ components:
       properties:
         vector_db_id:
           type: string
+          description: >-
+            The identifier of the vector database to insert the chunks into.
         chunks:
           type: array
           items:
@@ -5533,6 +7056,9 @@ components:
             properties:
               content:
                 $ref: '#/components/schemas/InterleavedContent'
+                description: >-
+                  The content of the chunk, which can be interleaved text, images,
+                  or other types.
               metadata:
                 type: object
                 additionalProperties:
@@ -5543,13 +7069,32 @@ components:
                     - type: string
                     - type: array
                     - type: object
+                description: >-
+                  Metadata associated with the chunk, such as document ID, source,
+                  or other relevant information.
+              embedding:
+                type: array
+                items:
+                  type: number
+                description: >-
+                  Optional embedding for the chunk. If not provided, it will be computed
+                  later.
             additionalProperties: false
             required:
               - content
               - metadata
             title: Chunk
+            description: >-
+              A chunk of content that can be inserted into a vector database.
+          description: >-
+            The chunks to insert. Each `Chunk` should contain content which can be
+            interleaved text, images, or other types. `metadata`: `dict[str, Any]`
+            and `embedding`: `List[float]` are optional. If `metadata` is provided,
+            you configure how Llama Stack formats the chunk during generation. If
+            `embedding` is not provided, it will be computed later.
         ttl_seconds:
           type: integer
+          description: The time to live of the chunks.
       additionalProperties: false
       required:
         - vector_db_id
@@ -5574,18 +7119,30 @@ components:
               - type: string
               - type: array
               - type: object
+        health:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
       additionalProperties: false
       required:
         - api
         - provider_id
         - provider_type
         - config
+        - health
       title: ProviderInfo
     InvokeToolRequest:
       type: object
       properties:
         tool_name:
           type: string
+          description: The name of the tool to invoke.
         kwargs:
           type: object
           additionalProperties:
@@ -5596,6 +7153,8 @@ components:
               - type: string
               - type: array
               - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool.
       additionalProperties: false
       required:
         - tool_name
@@ -5668,28 +7227,6 @@ components:
         - job_id
         - status
       title: Job
-    ListAgentSessionsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Session'
-      additionalProperties: false
-      required:
-        - data
-      title: ListAgentSessionsResponse
-    ListAgentsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Agent'
-      additionalProperties: false
-      required:
-        - data
-      title: ListAgentsResponse
     BucketResponse:
       type: object
       properties:
@@ -5724,6 +7261,73 @@ components:
       required:
         - data
       title: ListBenchmarksResponse
+    Order:
+      type: string
+      enum:
+        - asc
+        - desc
+      title: Order
+    ListOpenAIChatCompletionResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            properties:
+              id:
+                type: string
+                description: The ID of the chat completion
+              choices:
+                type: array
+                items:
+                  $ref: '#/components/schemas/OpenAIChoice'
+                description: List of choices
+              object:
+                type: string
+                const: chat.completion
+                default: chat.completion
+                description: >-
+                  The object type, which will be "chat.completion"
+              created:
+                type: integer
+                description: >-
+                  The Unix timestamp in seconds when the chat completion was created
+              model:
+                type: string
+                description: >-
+                  The model that was used to generate the chat completion
+              input_messages:
+                type: array
+                items:
+                  $ref: '#/components/schemas/OpenAIMessageParam'
+            additionalProperties: false
+            required:
+              - id
+              - choices
+              - object
+              - created
+              - model
+              - input_messages
+            title: OpenAICompletionWithInputMessages
+        has_more:
+          type: boolean
+        first_id:
+          type: string
+        last_id:
+          type: string
+        object:
+          type: string
+          const: list
+          default: list
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+        - first_id
+        - last_id
+        - object
+      title: ListOpenAIChatCompletionResponse
     ListDatasetsResponse:
       type: object
       properties:
@@ -5760,6 +7364,96 @@ components:
       required:
         - data
       title: ListModelsResponse
+    ListOpenAIResponseInputItem:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInput'
+        object:
+          type: string
+          const: list
+          default: list
+      additionalProperties: false
+      required:
+        - data
+        - object
+      title: ListOpenAIResponseInputItem
+    ListOpenAIResponseObject:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
+        has_more:
+          type: boolean
+        first_id:
+          type: string
+        last_id:
+          type: string
+        object:
+          type: string
+          const: list
+          default: list
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+        - first_id
+        - last_id
+        - object
+      title: ListOpenAIResponseObject
+    OpenAIResponseObjectWithInput:
+      type: object
+      properties:
+        created_at:
+          type: integer
+        error:
+          $ref: '#/components/schemas/OpenAIResponseError'
+        id:
+          type: string
+        model:
+          type: string
+        object:
+          type: string
+          const: response
+          default: response
+        output:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseOutput'
+        parallel_tool_calls:
+          type: boolean
+          default: false
+        previous_response_id:
+          type: string
+        status:
+          type: string
+        temperature:
+          type: number
+        top_p:
+          type: number
+        truncation:
+          type: string
+        user:
+          type: string
+        input:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInput'
+      additionalProperties: false
+      required:
+        - created_at
+        - id
+        - model
+        - object
+        - output
+        - parallel_tool_calls
+        - status
+        - input
+      title: OpenAIResponseObjectWithInput
     ListProvidersResponse:
       type: object
       properties:
@@ -5876,6 +7570,13 @@ components:
           unstructured_log: '#/components/schemas/UnstructuredLogEvent'
           metric: '#/components/schemas/MetricEvent'
           structured_log: '#/components/schemas/StructuredLogEvent'
+    EventType:
+      type: string
+      enum:
+        - unstructured_log
+        - structured_log
+        - metric
+      title: EventType
     LogSeverity:
       type: string
       enum:
@@ -5906,7 +7607,7 @@ components:
               - type: boolean
               - type: 'null'
         type:
-          type: string
+          $ref: '#/components/schemas/EventType'
           const: metric
           default: metric
         metric:
@@ -5931,7 +7632,7 @@ components:
       type: object
       properties:
         type:
-          type: string
+          $ref: '#/components/schemas/StructuredLogType'
           const: span_end
           default: span_end
         status:
@@ -5945,7 +7646,7 @@ components:
       type: object
       properties:
         type:
-          type: string
+          $ref: '#/components/schemas/StructuredLogType'
           const: span_start
           default: span_start
         name:
@@ -5977,7 +7678,7 @@ components:
               - type: boolean
               - type: 'null'
         type:
-          type: string
+          $ref: '#/components/schemas/EventType'
           const: structured_log
           default: structured_log
         payload:
@@ -5999,6 +7700,12 @@ components:
         mapping:
           span_start: '#/components/schemas/SpanStartPayload'
           span_end: '#/components/schemas/SpanEndPayload'
+    StructuredLogType:
+      type: string
+      enum:
+        - span_start
+        - span_end
+      title: StructuredLogType
     UnstructuredLogEvent:
       type: object
       properties:
@@ -6019,7 +7726,7 @@ components:
               - type: boolean
               - type: 'null'
         type:
-          type: string
+          $ref: '#/components/schemas/EventType'
           const: unstructured_log
           default: unstructured_log
         message:
@@ -6040,156 +7747,85 @@ components:
       properties:
         event:
           $ref: '#/components/schemas/Event'
+          description: The event to log.
         ttl_seconds:
           type: integer
+          description: The time to live of the event.
       additionalProperties: false
       required:
         - event
         - ttl_seconds
       title: LogEventRequest
-    OpenAIAssistantMessageParam:
+    OpenAIJSONSchema:
       type: object
       properties:
-        role:
-          type: string
-          const: assistant
-          default: assistant
-          description: >-
-            Must be "assistant" to identify this as the model's response
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The content of the model's response
         name:
           type: string
-          description: >-
-            (Optional) The name of the assistant message participant.
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/ToolCall'
-          description: >-
-            List of tool calls. Each tool call is a ToolCall object.
+        description:
+          type: string
+        strict:
+          type: boolean
+        schema:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
       additionalProperties: false
       required:
-        - role
-        - content
-      title: OpenAIAssistantMessageParam
-      description: >-
-        A message containing the model's (assistant) response in an OpenAI-compatible
-        chat completion request.
-    OpenAIDeveloperMessageParam:
+        - name
+      title: OpenAIJSONSchema
+    OpenAIResponseFormatJSONObject:
       type: object
       properties:
-        role:
+        type:
           type: string
-          const: developer
-          default: developer
-          description: >-
-            Must be "developer" to identify this as a developer message
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The content of the developer message
-        name:
-          type: string
-          description: >-
-            (Optional) The name of the developer message participant.
+          const: json_object
+          default: json_object
       additionalProperties: false
       required:
-        - role
-        - content
-      title: OpenAIDeveloperMessageParam
-      description: >-
-        A message from the developer in an OpenAI-compatible chat completion request.
-    OpenAIMessageParam:
+        - type
+      title: OpenAIResponseFormatJSONObject
+    OpenAIResponseFormatJSONSchema:
+      type: object
+      properties:
+        type:
+          type: string
+          const: json_schema
+          default: json_schema
+        json_schema:
+          $ref: '#/components/schemas/OpenAIJSONSchema'
+      additionalProperties: false
+      required:
+        - type
+        - json_schema
+      title: OpenAIResponseFormatJSONSchema
+    OpenAIResponseFormatParam:
       oneOf:
-        - $ref: '#/components/schemas/OpenAIUserMessageParam'
-        - $ref: '#/components/schemas/OpenAISystemMessageParam'
-        - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
-        - $ref: '#/components/schemas/OpenAIToolMessageParam'
-        - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+        - $ref: '#/components/schemas/OpenAIResponseFormatText'
+        - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+        - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
       discriminator:
-        propertyName: role
+        propertyName: type
         mapping:
-          user: '#/components/schemas/OpenAIUserMessageParam'
-          system: '#/components/schemas/OpenAISystemMessageParam'
-          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
-          tool: '#/components/schemas/OpenAIToolMessageParam'
-          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
-    OpenAISystemMessageParam:
+          text: '#/components/schemas/OpenAIResponseFormatText'
+          json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+          json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+    OpenAIResponseFormatText:
       type: object
       properties:
-        role:
+        type:
           type: string
-          const: system
-          default: system
-          description: >-
-            Must be "system" to identify this as a system message
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the "system prompt". If multiple system messages are provided,
-            they are concatenated. The underlying Llama Stack code may also add other
-            system messages (for example, for formatting tool definitions).
-        name:
-          type: string
-          description: >-
-            (Optional) The name of the system message participant.
+          const: text
+          default: text
       additionalProperties: false
       required:
-        - role
-        - content
-      title: OpenAISystemMessageParam
-      description: >-
-        A system message providing instructions or context to the model.
-    OpenAIToolMessageParam:
-      type: object
-      properties:
-        role:
-          type: string
-          const: tool
-          default: tool
-          description: >-
-            Must be "tool" to identify this as a tool response
-        tool_call_id:
-          type: string
-          description: >-
-            Unique identifier for the tool call this response is for
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The response content from the tool
-      additionalProperties: false
-      required:
-        - role
-        - tool_call_id
-        - content
-      title: OpenAIToolMessageParam
-      description: >-
-        A message representing the result of a tool invocation in an OpenAI-compatible
-        chat completion request.
-    OpenAIUserMessageParam:
-      type: object
-      properties:
-        role:
-          type: string
-          const: user
-          default: user
-          description: >-
-            Must be "user" to identify this as a user message
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the message, which can include text and other media
-        name:
-          type: string
-          description: >-
-            (Optional) The name of the user message participant.
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: OpenAIUserMessageParam
-      description: >-
-        A message from the user in an OpenAI-compatible chat completion request.
+        - type
+      title: OpenAIResponseFormatText
     OpenaiChatCompletionRequest:
       type: object
       properties:
@@ -6202,11 +7838,11 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/OpenAIMessageParam'
-          description: List of messages in the conversation
+          description: List of messages in the conversation.
         frequency_penalty:
           type: number
           description: >-
-            (Optional) The penalty for repeated tokens
+            (Optional) The penalty for repeated tokens.
         function_call:
           oneOf:
             - type: string
@@ -6219,7 +7855,7 @@ components:
                   - type: string
                   - type: array
                   - type: object
-          description: (Optional) The function call to use
+          description: (Optional) The function call to use.
         functions:
           type: array
           items:
@@ -6232,54 +7868,52 @@ components:
                 - type: string
                 - type: array
                 - type: object
-          description: (Optional) List of functions to use
+          description: (Optional) List of functions to use.
         logit_bias:
           type: object
           additionalProperties:
             type: number
-          description: (Optional) The logit bias to use
+          description: (Optional) The logit bias to use.
         logprobs:
           type: boolean
-          description: (Optional) The log probabilities to use
+          description: (Optional) The log probabilities to use.
         max_completion_tokens:
           type: integer
           description: >-
-            (Optional) The maximum number of tokens to generate
+            (Optional) The maximum number of tokens to generate.
         max_tokens:
           type: integer
           description: >-
-            (Optional) The maximum number of tokens to generate
+            (Optional) The maximum number of tokens to generate.
         n:
           type: integer
           description: >-
-            (Optional) The number of completions to generate
+            (Optional) The number of completions to generate.
         parallel_tool_calls:
           type: boolean
           description: >-
-            (Optional) Whether to parallelize tool calls
+            (Optional) Whether to parallelize tool calls.
         presence_penalty:
           type: number
           description: >-
-            (Optional) The penalty for repeated tokens
+            (Optional) The penalty for repeated tokens.
         response_format:
-          type: object
-          additionalProperties:
-            type: string
-          description: (Optional) The response format to use
+          $ref: '#/components/schemas/OpenAIResponseFormatParam'
+          description: (Optional) The response format to use.
         seed:
           type: integer
-          description: (Optional) The seed to use
+          description: (Optional) The seed to use.
         stop:
           oneOf:
             - type: string
             - type: array
               items:
                 type: string
-          description: (Optional) The stop tokens to use
+          description: (Optional) The stop tokens to use.
         stream:
           type: boolean
           description: >-
-            (Optional) Whether to stream the response
+            (Optional) Whether to stream the response.
         stream_options:
           type: object
           additionalProperties:
@@ -6290,10 +7924,10 @@ components:
               - type: string
               - type: array
               - type: object
-          description: (Optional) The stream options to use
+          description: (Optional) The stream options to use.
         temperature:
           type: number
-          description: (Optional) The temperature to use
+          description: (Optional) The temperature to use.
         tool_choice:
           oneOf:
             - type: string
@@ -6306,7 +7940,7 @@ components:
                   - type: string
                   - type: array
                   - type: object
-          description: (Optional) The tool choice to use
+          description: (Optional) The tool choice to use.
         tools:
           type: array
           items:
@@ -6319,17 +7953,17 @@ components:
                 - type: string
                 - type: array
                 - type: object
-          description: (Optional) The tools to use
+          description: (Optional) The tools to use.
         top_logprobs:
           type: integer
           description: >-
-            (Optional) The top log probabilities to use
+            (Optional) The top log probabilities to use.
         top_p:
           type: number
-          description: (Optional) The top p to use
+          description: (Optional) The top p to use.
         user:
           type: string
-          description: (Optional) The user to use
+          description: (Optional) The user to use.
       additionalProperties: false
       required:
         - model
@@ -6370,86 +8004,86 @@ components:
       title: OpenAIChatCompletion
       description: >-
         Response from an OpenAI-compatible chat completion request.
-    OpenAIChoice:
+    OpenAIChatCompletionChunk:
       type: object
       properties:
-        message:
-          $ref: '#/components/schemas/OpenAIMessageParam'
-          description: The message from the model
+        id:
+          type: string
+          description: The ID of the chat completion
+        choices:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIChunkChoice'
+          description: List of choices
+        object:
+          type: string
+          const: chat.completion.chunk
+          default: chat.completion.chunk
+          description: >-
+            The object type, which will be "chat.completion.chunk"
+        created:
+          type: integer
+          description: >-
+            The Unix timestamp in seconds when the chat completion was created
+        model:
+          type: string
+          description: >-
+            The model that was used to generate the chat completion
+      additionalProperties: false
+      required:
+        - id
+        - choices
+        - object
+        - created
+        - model
+      title: OpenAIChatCompletionChunk
+      description: >-
+        Chunk from a streaming response to an OpenAI-compatible chat completion request.
+    OpenAIChoiceDelta:
+      type: object
+      properties:
+        content:
+          type: string
+          description: (Optional) The content of the delta
+        refusal:
+          type: string
+          description: (Optional) The refusal of the delta
+        role:
+          type: string
+          description: (Optional) The role of the delta
+        tool_calls:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+          description: (Optional) The tool calls of the delta
+      additionalProperties: false
+      title: OpenAIChoiceDelta
+      description: >-
+        A delta from an OpenAI-compatible chat completion streaming response.
+    OpenAIChunkChoice:
+      type: object
+      properties:
+        delta:
+          $ref: '#/components/schemas/OpenAIChoiceDelta'
+          description: The delta from the chunk
         finish_reason:
           type: string
           description: The reason the model stopped generating
         index:
           type: integer
+          description: The index of the choice
         logprobs:
           $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+          description: >-
+            (Optional) The log probabilities for the tokens in the message
       additionalProperties: false
       required:
-        - message
+        - delta
         - finish_reason
         - index
-      title: OpenAIChoice
+      title: OpenAIChunkChoice
       description: >-
-        A choice from an OpenAI-compatible chat completion response.
-    OpenAIChoiceLogprobs:
-      type: object
-      properties:
-        content:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAITokenLogProb'
-        refusal:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAITokenLogProb'
-      additionalProperties: false
-      title: OpenAIChoiceLogprobs
-      description: >-
-        The log probabilities for the tokens in the message from an OpenAI-compatible
-        chat completion response.
-    OpenAITokenLogProb:
-      type: object
-      properties:
-        token:
-          type: string
-        bytes:
-          type: array
-          items:
-            type: integer
-        logprob:
-          type: number
-        top_logprobs:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAITopLogProb'
-      additionalProperties: false
-      required:
-        - token
-        - logprob
-        - top_logprobs
-      title: OpenAITokenLogProb
-      description: >-
-        The log probability for a token from an OpenAI-compatible chat completion
-        response.
-    OpenAITopLogProb:
-      type: object
-      properties:
-        token:
-          type: string
-        bytes:
-          type: array
-          items:
-            type: integer
-        logprob:
-          type: number
-      additionalProperties: false
-      required:
-        - token
-        - logprob
-      title: OpenAITopLogProb
-      description: >-
-        The top log probability for a token from an OpenAI-compatible chat completion
-        response.
+        A chunk choice from an OpenAI-compatible chat completion streaming response.
     OpenaiCompletionRequest:
       type: object
       properties:
@@ -6472,52 +8106,52 @@ components:
                 type: array
                 items:
                   type: integer
-          description: The prompt to generate a completion for
+          description: The prompt to generate a completion for.
         best_of:
           type: integer
           description: >-
-            (Optional) The number of completions to generate
+            (Optional) The number of completions to generate.
         echo:
           type: boolean
-          description: (Optional) Whether to echo the prompt
+          description: (Optional) Whether to echo the prompt.
         frequency_penalty:
           type: number
           description: >-
-            (Optional) The penalty for repeated tokens
+            (Optional) The penalty for repeated tokens.
         logit_bias:
           type: object
           additionalProperties:
             type: number
-          description: (Optional) The logit bias to use
+          description: (Optional) The logit bias to use.
         logprobs:
           type: boolean
-          description: (Optional) The log probabilities to use
+          description: (Optional) The log probabilities to use.
         max_tokens:
           type: integer
           description: >-
-            (Optional) The maximum number of tokens to generate
+            (Optional) The maximum number of tokens to generate.
         n:
           type: integer
           description: >-
-            (Optional) The number of completions to generate
+            (Optional) The number of completions to generate.
         presence_penalty:
           type: number
           description: >-
-            (Optional) The penalty for repeated tokens
+            (Optional) The penalty for repeated tokens.
         seed:
           type: integer
-          description: (Optional) The seed to use
+          description: (Optional) The seed to use.
         stop:
           oneOf:
             - type: string
             - type: array
               items:
                 type: string
-          description: (Optional) The stop tokens to use
+          description: (Optional) The stop tokens to use.
         stream:
           type: boolean
           description: >-
-            (Optional) Whether to stream the response
+            (Optional) Whether to stream the response.
         stream_options:
           type: object
           additionalProperties:
@@ -6528,16 +8162,16 @@ components:
               - type: string
               - type: array
               - type: object
-          description: (Optional) The stream options to use
+          description: (Optional) The stream options to use.
         temperature:
           type: number
-          description: (Optional) The temperature to use
+          description: (Optional) The temperature to use.
         top_p:
           type: number
-          description: (Optional) The top p to use
+          description: (Optional) The top p to use.
         user:
           type: string
-          description: (Optional) The user to use
+          description: (Optional) The user to use.
         guided_choice:
           type: array
           items:
@@ -6595,6 +8229,118 @@ components:
       title: OpenAICompletionChoice
       description: >-
         A choice from an OpenAI-compatible completion response.
+    OpenaiEmbeddingsRequest:
+      type: object
+      properties:
+        model:
+          type: string
+          description: >-
+            The identifier of the model to use. The model must be an embedding model
+            registered with Llama Stack and available via the /models endpoint.
+        input:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+          description: >-
+            Input text to embed, encoded as a string or array of strings. To embed
+            multiple inputs in a single request, pass an array of strings.
+        encoding_format:
+          type: string
+          description: >-
+            (Optional) The format to return the embeddings in. Can be either "float"
+            or "base64". Defaults to "float".
+        dimensions:
+          type: integer
+          description: >-
+            (Optional) The number of dimensions the resulting output embeddings should
+            have. Only supported in text-embedding-3 and later models.
+        user:
+          type: string
+          description: >-
+            (Optional) A unique identifier representing your end-user, which can help
+            OpenAI to monitor and detect abuse.
+      additionalProperties: false
+      required:
+        - model
+        - input
+      title: OpenaiEmbeddingsRequest
+    OpenAIEmbeddingData:
+      type: object
+      properties:
+        object:
+          type: string
+          const: embedding
+          default: embedding
+          description: >-
+            The object type, which will be "embedding"
+        embedding:
+          oneOf:
+            - type: array
+              items:
+                type: number
+            - type: string
+          description: >-
+            The embedding vector as a list of floats (when encoding_format="float")
+            or as a base64-encoded string (when encoding_format="base64")
+        index:
+          type: integer
+          description: >-
+            The index of the embedding in the input list
+      additionalProperties: false
+      required:
+        - object
+        - embedding
+        - index
+      title: OpenAIEmbeddingData
+      description: >-
+        A single embedding data object from an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingUsage:
+      type: object
+      properties:
+        prompt_tokens:
+          type: integer
+          description: The number of tokens in the input
+        total_tokens:
+          type: integer
+          description: The total number of tokens used
+      additionalProperties: false
+      required:
+        - prompt_tokens
+        - total_tokens
+      title: OpenAIEmbeddingUsage
+      description: >-
+        Usage information for an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingsResponse:
+      type: object
+      properties:
+        object:
+          type: string
+          const: list
+          default: list
+          description: The object type, which will be "list"
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIEmbeddingData'
+          description: List of embedding data objects
+        model:
+          type: string
+          description: >-
+            The model that was used to generate the embeddings
+        usage:
+          $ref: '#/components/schemas/OpenAIEmbeddingUsage'
+          description: Usage information
+      additionalProperties: false
+      required:
+        - object
+        - data
+        - model
+        - usage
+      title: OpenAIEmbeddingsResponse
+      description: >-
+        Response from an OpenAI-compatible embeddings request.
     OpenAIModel:
       type: object
       properties:
@@ -6753,12 +8499,16 @@ components:
       properties:
         job_uuid:
           type: string
+          description: The UUID of the job to create.
         finetuned_model:
           type: string
+          description: The model to fine-tune.
         algorithm_config:
           $ref: '#/components/schemas/DPOAlignmentConfig'
+          description: The algorithm configuration.
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
+          description: The training configuration.
         hyperparam_search_config:
           type: object
           additionalProperties:
@@ -6769,6 +8519,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The hyperparam search configuration.
         logger_config:
           type: object
           additionalProperties:
@@ -6779,6 +8530,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The logger configuration.
       additionalProperties: false
       required:
         - job_uuid
@@ -6834,18 +8586,41 @@ components:
       properties:
         query_generator_config:
           $ref: '#/components/schemas/RAGQueryGeneratorConfig'
+          description: Configuration for the query generator.
         max_tokens_in_context:
           type: integer
           default: 4096
+          description: Maximum number of tokens in the context.
         max_chunks:
           type: integer
           default: 5
+          description: Maximum number of chunks to retrieve.
+        chunk_template:
+          type: string
+          default: >
+            Result {index}
+
+            Content: {chunk.content}
+
+            Metadata: {metadata}
+          description: >-
+            Template for formatting each retrieved chunk in the context. Available
+            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
+            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
+            {chunk.content}\nMetadata: {metadata}\n"
+        mode:
+          type: string
+          description: >-
+            Search mode for retrieval—either "vector" or "keyword". Default "vector".
       additionalProperties: false
       required:
         - query_generator_config
         - max_tokens_in_context
         - max_chunks
+        - chunk_template
       title: RAGQueryConfig
+      description: >-
+        Configuration for the RAG query generation.
     RAGQueryGeneratorConfig:
       oneOf:
         - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
@@ -6895,8 +8670,11 @@ components:
       properties:
         vector_db_id:
           type: string
+          description: >-
+            The identifier of the vector database to query.
         query:
           $ref: '#/components/schemas/InterleavedContent'
+          description: The query to search for.
         params:
           type: object
           additionalProperties:
@@ -6907,6 +8685,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The parameters of the query.
       additionalProperties: false
       required:
         - vector_db_id
@@ -6922,6 +8701,9 @@ components:
             properties:
               content:
                 $ref: '#/components/schemas/InterleavedContent'
+                description: >-
+                  The content of the chunk, which can be interleaved text, images,
+                  or other types.
               metadata:
                 type: object
                 additionalProperties:
@@ -6932,11 +8714,23 @@ components:
                     - type: string
                     - type: array
                     - type: object
+                description: >-
+                  Metadata associated with the chunk, such as document ID, source,
+                  or other relevant information.
+              embedding:
+                type: array
+                items:
+                  type: number
+                description: >-
+                  Optional embedding for the chunk. If not provided, it will be computed
+                  later.
             additionalProperties: false
             required:
               - content
               - metadata
             title: Chunk
+            description: >-
+              A chunk of content that can be inserted into a vector database.
         scores:
           type: array
           items:
@@ -6946,6 +8740,109 @@ components:
         - chunks
         - scores
       title: QueryChunksResponse
+    QueryMetricsRequest:
+      type: object
+      properties:
+        start_time:
+          type: integer
+          description: The start time of the metric to query.
+        end_time:
+          type: integer
+          description: The end time of the metric to query.
+        granularity:
+          type: string
+          description: The granularity of the metric to query.
+        query_type:
+          type: string
+          enum:
+            - range
+            - instant
+          description: The type of query to perform.
+        label_matchers:
+          type: array
+          items:
+            type: object
+            properties:
+              name:
+                type: string
+              value:
+                type: string
+              operator:
+                type: string
+                enum:
+                  - '='
+                  - '!='
+                  - =~
+                  - '!~'
+                title: MetricLabelOperator
+                default: '='
+            additionalProperties: false
+            required:
+              - name
+              - value
+              - operator
+            title: MetricLabelMatcher
+          description: >-
+            The label matchers to apply to the metric.
+      additionalProperties: false
+      required:
+        - start_time
+        - query_type
+      title: QueryMetricsRequest
+    MetricDataPoint:
+      type: object
+      properties:
+        timestamp:
+          type: integer
+        value:
+          type: number
+      additionalProperties: false
+      required:
+        - timestamp
+        - value
+      title: MetricDataPoint
+    MetricLabel:
+      type: object
+      properties:
+        name:
+          type: string
+        value:
+          type: string
+      additionalProperties: false
+      required:
+        - name
+        - value
+      title: MetricLabel
+    MetricSeries:
+      type: object
+      properties:
+        metric:
+          type: string
+        labels:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricLabel'
+        values:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricDataPoint'
+      additionalProperties: false
+      required:
+        - metric
+        - labels
+        - values
+      title: MetricSeries
+    QueryMetricsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricSeries'
+      additionalProperties: false
+      required:
+        - data
+      title: QueryMetricsResponse
     QueryCondition:
       type: object
       properties:
@@ -6982,12 +8879,16 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the spans.
         attributes_to_return:
           type: array
           items:
             type: string
+          description: The attributes to return in the spans.
         max_depth:
           type: integer
+          description: The maximum depth of the tree.
       additionalProperties: false
       required:
         - attribute_filters
@@ -7011,14 +8912,19 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the traces.
         limit:
           type: integer
+          description: The limit of traces to return.
         offset:
           type: integer
+          description: The offset of the traces to return.
         order_by:
           type: array
           items:
             type: string
+          description: The order by of the traces to return.
       additionalProperties: false
       title: QueryTracesRequest
     QueryTracesResponse:
@@ -7037,16 +8943,25 @@ components:
       properties:
         benchmark_id:
           type: string
+          description: The ID of the benchmark to register.
         dataset_id:
           type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
         scoring_functions:
           type: array
           items:
             type: string
+          description: >-
+            The scoring functions to use for the benchmark.
         provider_benchmark_id:
           type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
         provider_id:
           type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
         metadata:
           type: object
           additionalProperties:
@@ -7057,6 +8972,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The metadata to use for the benchmark.
       additionalProperties: false
       required:
         - benchmark_id
@@ -7073,7 +8989,7 @@ components:
             - eval/question-answer
             - eval/messages-answer
           description: >-
-            The purpose of the dataset. One of - "post-training/messages": The dataset
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
             contains a messages column with list of messages for post-training. {
             "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
             "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
@@ -7106,7 +9022,7 @@ components:
               - type: array
               - type: object
           description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
         dataset_id:
           type: string
           description: >-
@@ -7121,10 +9037,14 @@ components:
       properties:
         model_id:
           type: string
+          description: The identifier of the model to register.
         provider_model_id:
           type: string
+          description: >-
+            The identifier of the model in the provider.
         provider_id:
           type: string
+          description: The identifier of the provider.
         metadata:
           type: object
           additionalProperties:
@@ -7135,8 +9055,10 @@ components:
               - type: string
               - type: array
               - type: object
+          description: Any additional metadata for this model.
         model_type:
           $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
       additionalProperties: false
       required:
         - model_id
@@ -7146,16 +9068,27 @@ components:
       properties:
         scoring_fn_id:
           type: string
+          description: >-
+            The ID of the scoring function to register.
         description:
           type: string
+          description: The description of the scoring function.
         return_type:
           $ref: '#/components/schemas/ParamType'
+          description: The return type of the scoring function.
         provider_scoring_fn_id:
           type: string
+          description: >-
+            The ID of the provider scoring function to use for the scoring function.
         provider_id:
           type: string
+          description: >-
+            The ID of the provider to use for the scoring function.
         params:
           $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            The parameters for the scoring function for benchmark eval, these can
+            be overridden for app eval.
       additionalProperties: false
       required:
         - scoring_fn_id
@@ -7167,10 +9100,15 @@ components:
       properties:
         shield_id:
           type: string
+          description: >-
+            The identifier of the shield to register.
         provider_shield_id:
           type: string
+          description: >-
+            The identifier of the shield in the provider.
         provider_id:
           type: string
+          description: The identifier of the provider.
         params:
           type: object
           additionalProperties:
@@ -7181,6 +9119,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The parameters of the shield.
       additionalProperties: false
       required:
         - shield_id
@@ -7190,10 +9129,15 @@ components:
       properties:
         toolgroup_id:
           type: string
+          description: The ID of the tool group to register.
         provider_id:
           type: string
+          description: >-
+            The ID of the provider to use for the tool group.
         mcp_endpoint:
           $ref: '#/components/schemas/URL'
+          description: >-
+            The MCP endpoint to use for the tool group.
         args:
           type: object
           additionalProperties:
@@ -7204,6 +9148,8 @@ components:
               - type: string
               - type: array
               - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool group.
       additionalProperties: false
       required:
         - toolgroup_id
@@ -7214,14 +9160,21 @@ components:
       properties:
         vector_db_id:
           type: string
+          description: >-
+            The identifier of the vector database to register.
         embedding_model:
           type: string
+          description: The embedding model to use.
         embedding_dimension:
           type: integer
+          description: The dimension of the embedding model.
         provider_id:
           type: string
+          description: The identifier of the provider.
         provider_vector_db_id:
           type: string
+          description: >-
+            The identifier of the vector database in the provider.
       additionalProperties: false
       required:
         - vector_db_id
@@ -7258,10 +9211,12 @@ components:
       properties:
         shield_id:
           type: string
+          description: The identifier of the shield to run.
         messages:
           type: array
           items:
             $ref: '#/components/schemas/Message'
+          description: The messages to run the shield on.
         params:
           type: object
           additionalProperties:
@@ -7272,6 +9227,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The parameters of the shield.
       additionalProperties: false
       required:
         - shield_id
@@ -7292,14 +9248,20 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the spans.
         attributes_to_save:
           type: array
           items:
             type: string
+          description: The attributes to save to the dataset.
         dataset_id:
           type: string
+          description: >-
+            The ID of the dataset to save the spans to.
         max_depth:
           type: integer
+          description: The maximum depth of the tree.
       additionalProperties: false
       required:
         - attribute_filters
@@ -7354,14 +9316,19 @@ components:
       properties:
         dataset_id:
           type: string
+          description: The ID of the dataset to score.
         scoring_functions:
           type: object
           additionalProperties:
             oneOf:
               - $ref: '#/components/schemas/ScoringFnParams'
               - type: 'null'
+          description: >-
+            The scoring functions to use for the scoring.
         save_results_dataset:
           type: boolean
+          description: >-
+            Whether to save the results to a dataset.
       additionalProperties: false
       required:
         - dataset_id
@@ -7446,8 +9413,10 @@ components:
       properties:
         job_uuid:
           type: string
+          description: The UUID of the job to create.
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
+          description: The training configuration.
         hyperparam_search_config:
           type: object
           additionalProperties:
@@ -7458,6 +9427,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The hyperparam search configuration.
         logger_config:
           type: object
           additionalProperties:
@@ -7468,12 +9438,16 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The logger configuration.
         model:
           type: string
+          description: The model to fine-tune.
         checkpoint_dir:
           type: string
+          description: The directory to save checkpoint(s) to.
         algorithm_config:
           $ref: '#/components/schemas/AlgorithmConfig'
+          description: The algorithm configuration.
       additionalProperties: false
       required:
         - job_uuid
diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb
index b764d4d34..cdaf074b8 100644
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@@ -1050,8 +1050,6 @@
             "text/html": [
               "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ToolGroup</span><span style=\"font-weight: bold\">(</span>\n",
               "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">identifier</span>=<span style=\"color: #008000; text-decoration-color: #008000\">'builtin::code_interpreter'</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">provider_id</span>=<span style=\"color: #008000; text-decoration-color: #008000\">'code-interpreter'</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">provider_resource_id</span>=<span style=\"color: #008000; text-decoration-color: #008000\">'builtin::code_interpreter'</span>,\n",
               "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">type</span>=<span style=\"color: #008000; text-decoration-color: #008000\">'tool_group'</span>,\n",
               "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">args</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>,\n",
               "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">mcp_endpoint</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>\n",
@@ -1061,7 +1059,6 @@
             "text/plain": [
               "\u001b[1;35mToolGroup\u001b[0m\u001b[1m(\u001b[0m\n",
               "\u001b[2;32m│   \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'builtin::code_interpreter'\u001b[0m,\n",
-              "\u001b[2;32m│   \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'code-interpreter'\u001b[0m,\n",
               "\u001b[2;32m│   \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'builtin::code_interpreter'\u001b[0m,\n",
               "\u001b[2;32m│   \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'tool_group'\u001b[0m,\n",
               "\u001b[2;32m│   \u001b[0m\u001b[33margs\u001b[0m=\u001b[3;35mNone\u001b[0m,\n",
diff --git a/docs/getting_started_llama_api.ipynb b/docs/getting_started_llama_api.ipynb
new file mode 100644
index 000000000..128e9114a
--- /dev/null
+++ b/docs/getting_started_llama_api.ipynb
@@ -0,0 +1,907 @@
+{
+    "cells": [
+      {
+        "cell_type": "markdown",
+        "id": "c1e7571c",
+        "metadata": {
+          "id": "c1e7571c"
+        },
+        "source": [
+          "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
+          "\n",
+          "# Getting Started with Llama 4 in Llama Stack\n",
+          "\n",
+          "<img src=\"https://llama-stack.readthedocs.io/en/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
+          "\n",
+          "[Llama Stack](https://github.com/meta-llama/llama-stack) defines and standardizes the set of core building blocks needed to bring generative AI applications to market. These building blocks are presented in the form of interoperable APIs with a broad set of Service Providers providing their implementations.\n",
+          "\n",
+          "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
+          "\n",
+          "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "4CV1Q19BDMVw",
+        "metadata": {
+          "id": "4CV1Q19BDMVw"
+        },
+        "source": [
+          "## 1. Getting started with Llama Stack"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "K4AvfUAJZOeS",
+        "metadata": {
+          "id": "K4AvfUAJZOeS"
+        },
+        "source": [
+          "### 1.1. Create Llama API account\n",
+          "\n",
+          "In this showcase, we will use [Llama API](https://llama.developer.meta.com/) as the inference provider. So, you would first get an API key from Llama API if you don't have one already.\n",
+          "\n",
+          "\n",
+          "\n",
+          "> **Note:**  Set the API Key in the Secrets of this notebook\n",
+          "\n"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "oDUB7M_qe-Gs",
+        "metadata": {
+          "id": "oDUB7M_qe-Gs"
+        },
+        "source": [
+          "### 1.2. Setup and Running a Llama Stack server\n",
+          "\n",
+          "Llama Stack is architected as a collection of APIs that provide developers with the building blocks to build AI applications. \n",
+          "\n",
+          "Llama stack is typically available as a server with an endpoint that you can make calls to. Partners like Together and Fireworks offer their own Llama Stack compatible endpoints.\n",
+          "\n",
+          "In this showcase, we will start a Llama Stack server that is running locally.\n"
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": null,
+        "id": "J2kGed0R5PSf",
+        "metadata": {
+          "colab": {
+            "base_uri": "https://localhost:8080/"
+          },
+          "collapsed": true,
+          "id": "J2kGed0R5PSf",
+          "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
+        },
+        "outputs": [
+          {
+            "name": "stdout",
+            "output_type": "stream",
+            "text": [
+              "Requirement already satisfied: uv in /opt/homebrew/Caskroom/miniconda/base/envs/l4/lib/python3.10/site-packages (0.6.12)\n",
+              "\u001b[2mUsing Python 3.10.16 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/l4\u001b[0m\n",
+              "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 83ms\u001b[0m\u001b[0m\n",
+              "Environment '/Users/erichuang/projects/internal-llama-stack/.venv' already exists, re-using it.\n",
+              "Virtual environment /Users/erichuang/projects/internal-llama-stack/.venv is already active\n",
+              "\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
+              "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 387ms\u001b[0m\u001b[0m\n",
+              "Installing pip dependencies\n",
+              "\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
+              "\u001b[2K\u001b[2mResolved \u001b[1m123 packages\u001b[0m \u001b[2min 1.13s\u001b[0m\u001b[0m                                       \u001b[0m\n",
+              "\u001b[2K\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)                                                   \n",
+              "\u001b[2K\u001b[1A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)-----\u001b[0m\u001b[0m     0 B/9.53 KiB                     \u001b[1A\n",
+              "\u001b[2K\u001b[1A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)-\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB                    \u001b[1A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2K\u001b[2A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m     0 B/44.00 KiB                     \u001b[2A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2K\u001b[2A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[2A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m\u001b[2m------------------------------\u001b[0m\u001b[0m     0 B/34.43 KiB\n",
+              "\u001b[2K\u001b[3A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[3A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
+              "\u001b[2K\u001b[3A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[3A\n",
+              "\u001b[2meval-type-backport\u001b[0m \u001b[32m\u001b[2m------------------------------\u001b[0m\u001b[0m     0 B/5.69 KiB\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
+              "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[4A\n",
+              "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
+              "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[4A\n",
+              "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m     0 B/85.81 KiB                     \u001b[5A\n",
+              "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB                   \u001b[5A\n",
+              "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m     0 B/3.08 MiB                      \u001b[6A\n",
+              "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB                    \u001b[6A\n",
+              "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m---------------------------\u001b[2m---\u001b[0m\u001b[0m 30.83 KiB/34.43 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB                    \u001b[6A\n",
+              "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB                    \u001b[6A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB                    \u001b[5A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtabulate  \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 30.91 KiB/3.08 MiB                    \u001b[5A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 30.91 KiB/3.08 MiB                    \u001b[4A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 46.91 KiB/3.08 MiB                    \u001b[4A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 62.91 KiB/3.08 MiB                    \u001b[4A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 78.91 KiB/3.08 MiB                    \u001b[4A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 94.91 KiB/3.08 MiB                    \u001b[4A\n",
+              "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB                     \u001b[4A\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m----------------------\u001b[2m--------\u001b[0m\u001b[0m 30.88 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[3A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB                     \u001b[3A\n",
+              "\u001b[2mtyper     \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 44.00 KiB/44.00 KiB\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[3A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB                     \u001b[3A\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[2A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 2.80 MiB/3.08 MiB                     \u001b[2A\n",
+              "\u001b[2mtogether  \u001b[0m \u001b[32m-----------------\u001b[2m-------------\u001b[0m\u001b[0m 48.00 KiB/85.81 KiB\n",
+              "\u001b[2K\u001b[2A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 2.81 MiB/3.08 MiB                     \u001b[2A\n",
+              "\u001b[2K\u001b[1A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 48.00 KiB/85.81 KiB                   \u001b[1A\n",
+              "\u001b[2K\u001b[1A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 80.00 KiB/85.81 KiB                   \u001b[1A\n",
+              "\u001b[2K\u001b[2mPrepared \u001b[1m6 packages\u001b[0m \u001b[2min 365ms\u001b[0m\u001b[0m                                                 \u001b[1A\n",
+              "\u001b[2K\u001b[2mInstalled \u001b[1m6 packages\u001b[0m \u001b[2min 50ms\u001b[0m\u001b[0m                                \u001b[0m\n",
+              " \u001b[32m+\u001b[39m \u001b[1meval-type-backport\u001b[0m\u001b[2m==0.2.2\u001b[0m\n",
+              " \u001b[32m+\u001b[39m \u001b[1mfaiss-cpu\u001b[0m\u001b[2m==1.10.0\u001b[0m\n",
+              " \u001b[32m+\u001b[39m \u001b[1mshellingham\u001b[0m\u001b[2m==1.5.4\u001b[0m\n",
+              " \u001b[32m+\u001b[39m \u001b[1mtabulate\u001b[0m\u001b[2m==0.9.0\u001b[0m\n",
+              " \u001b[32m+\u001b[39m \u001b[1mtogether\u001b[0m\u001b[2m==1.5.5\u001b[0m\n",
+              " \u001b[32m+\u001b[39m \u001b[1mtyper\u001b[0m\u001b[2m==0.15.2\u001b[0m\n",
+              "torch torchvision --index-url https://download.pytorch.org/whl/cpu\n",
+              "\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
+              "\u001b[2mAudited \u001b[1m2 packages\u001b[0m \u001b[2min 32ms\u001b[0m\u001b[0m\n",
+              "sentence-transformers --no-deps\n",
+              "\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
+              "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 63ms\u001b[0m\u001b[0m\n",
+              "\u001b[32mBuild Successful!\u001b[0m\n"
+            ]
+          }
+        ],
+        "source": [
+          "import os \n",
+          "import subprocess\n",
+          "import time\n",
+          "\n",
+          "!pip install uv \n",
+          "!uv pip install requests\n",
+          "\n",
+          "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
+          "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
+          "\n",
+          "# this command installs all the dependencies needed for the llama stack server \n",
+          "!uv run --with llama-stack llama stack build --template llama_api --image-type venv \n",
+          "\n",
+          "def run_llama_stack_server_background():\n",
+          "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
+          "    process = subprocess.Popen(\n",
+          "        \"uv run --with llama-stack llama stack run llama_api --image-type venv\",\n",
+          "        shell=True,\n",
+          "        stdout=log_file,\n",
+          "        stderr=log_file,\n",
+          "        text=True\n",
+          "    )\n",
+          "    \n",
+          "    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
+          "    return process\n",
+          "\n",
+          "def wait_for_server_to_start():\n",
+          "    import requests\n",
+          "    from requests.exceptions import ConnectionError\n",
+          "    import time\n",
+          "    \n",
+          "    url = \"http://0.0.0.0:8321/v1/health\"\n",
+          "    max_retries = 30\n",
+          "    retry_interval = 1\n",
+          "    \n",
+          "    print(\"Waiting for server to start\", end=\"\")\n",
+          "    for _ in range(max_retries):\n",
+          "        try:\n",
+          "            response = requests.get(url)\n",
+          "            if response.status_code == 200:\n",
+          "                print(\"\\nServer is ready!\")\n",
+          "                return True\n",
+          "        except ConnectionError:\n",
+          "            print(\".\", end=\"\", flush=True)\n",
+          "            time.sleep(retry_interval)\n",
+          "            \n",
+          "    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
+          "    return False\n",
+          "\n",
+          "\n",
+          "# use this helper if needed to kill the server \n",
+          "def kill_llama_stack_server():\n",
+          "    # Kill any existing llama stack server processes\n",
+          "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "c40e9efd",
+        "metadata": {},
+        "source": [
+          "### 1.3 Starting the Llama Stack Server"
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": null,
+        "id": "f779283d",
+        "metadata": {},
+        "outputs": [],
+        "source": [
+          "server_process = run_llama_stack_server_background()\n",
+          "assert wait_for_server_to_start()"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "90eb721b",
+        "metadata": {},
+        "source": [
+          "### 1.4 Install and Configure the Client\n",
+          "\n",
+          "Now that we have our Llama Stack server running locally, we need to install the client package to interact with it. The `llama-stack-client` provides a simple Python interface to access all the functionality of Llama Stack, including:\n",
+          "\n",
+          "- Chat Completions ( text and multimodal )\n",
+          "- Safety Shields \n",
+          "- Agent capabilities with tools like web search, RAG with Telemetry\n",
+          "- Evaluation and scoring frameworks\n",
+          "\n",
+          "The client handles all the API communication with our local server, making it easy to integrate Llama Stack's capabilities into your applications.\n",
+          "\n",
+          "In the next cells, we'll:\n",
+          "\n",
+          "1. Install the client package\n",
+          "2. Set up API keys for external services (Together AI and Tavily Search)\n",
+          "3. Initialize the client to connect to our local server\n"
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": 3,
+        "id": "2e68e32a",
+        "metadata": {},
+        "outputs": [
+          {
+            "name": "stdout",
+            "output_type": "stream",
+            "text": [
+              "\u001b[2mUsing Python 3.10.16 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/stack\u001b[0m\n",
+              "\u001b[2K\u001b[2mResolved \u001b[1m31 packages\u001b[0m \u001b[2min 284ms\u001b[0m\u001b[0m                                        \u001b[0m\n",
+              "\u001b[2mAudited \u001b[1m31 packages\u001b[0m \u001b[2min 0.04ms\u001b[0m\u001b[0m\n"
+            ]
+          }
+        ],
+        "source": [
+          "!pip install -U llama-stack-client"
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": 3,
+        "id": "E1UFuJC570Tk",
+        "metadata": {
+          "colab": {
+            "base_uri": "https://localhost:8080/",
+            "height": 1000,
+            "referenced_widgets": [
+              "75307e3dee604d30aa44713e6e293e64",
+              "5ce87402a79342af995df41ac3940d55",
+              "fbbcc19886cc43b38424fbb184162c61",
+              "29212208db6b432eb4f708cd64258954",
+              "50dd8994a4cf486ebbec5ffd4322992a",
+              "f9b768c703494dd198f2978aff4892e8",
+              "1231b9e4cab34c33a38bee63543f1e75",
+              "754deb3970604d48a522bc9f021ad945",
+              "f6ecca7a1a8340fbbe056235a2714fc3",
+              "ef4f63fe9d8f4683a9d20becb6e4e2cb",
+              "7508f10c13634e7aa682cfb29c48d9e7",
+              "26f1430ca7cb4ad5b1b8df1ffdbd32a9",
+              "7cd2d9c9ea7b4d70902ffaff33033078",
+              "101288236cff40b8bb9dbad80dbbc7ee",
+              "d5c9977838a249eeab6ef628279b8155",
+              "d032d1e7b4b54ba28ac83c1a12b23876",
+              "321fce57c158432abeae496ae8a947aa",
+              "3ebe00201bdb4e119e3b74f684a58345",
+              "0f8bab6b8ed04774b386fe952aae66f1",
+              "cfcb6e456c354d99be91f161552f3376",
+              "61bd0d490c0e4c04a331cf9ce6b7d38f",
+              "7d8653fca29f4df3a7487733ff9db60b",
+              "943f8fcb66614353a51f32f8344b6122",
+              "0e695245b97c4bbc85e349fda3dc07b9",
+              "bb0d168c41f540b8ae42239d3938483a",
+              "87700a80125348f28c4f249bdf8b0a8d",
+              "8902c3622da540e496ed5b1524bd01ca",
+              "90432ec1c24b4607a935c94e130cd68d",
+              "464147b149824f20afc727751a702fc7",
+              "67e37a088be64a2ba786ca923b1017dd",
+              "98786f52ef5345b0b9164b9c1f2b8e18",
+              "0e1b9910a77d4b7fa69cb8926e6547d7",
+              "0b276315be4345be83da1e03905c8495",
+              "e11f8c3891284e07bd2572257afd5e1b",
+              "ee18d96394994d01b49d5b03b3d9a019",
+              "844b06df5749441fab6f61656ce581a9",
+              "e1c6b9a20e074f17aeba976b24e80c65",
+              "c690da8daa1e4f9ea73bcacdd92e8a6d",
+              "d0b161ae25c441e8b3caf7a3d88c1b05",
+              "47cf4b6b835d43388576a2abf4cc54f8",
+              "03bbebd659e64b5d9c29a73570c34854",
+              "b68e5097d2504d2cbd7e19aa1aac3a04",
+              "22a665deff88477b9372c0350c4c572b",
+              "5e535ed2b83e496ab57b1c80b615ab0c",
+              "d9de065c7f81443e98ddf066c7b5bd54",
+              "1e836106837c4ac7a11b36e700c46b64",
+              "55591e8179084fcfa3a61c8bd8d09dcb",
+              "de1ef93c41364eda9b4b111231057348",
+              "23b0b2f4f82c4a21846e91d7cea91da5",
+              "9e4d0fbb51284a7487c495c7b95a293d",
+              "b0f8cf1f79e04b5fb47a810f2c81bd7e",
+              "0c359bc4c94c46acbc9094354a15c33d",
+              "59d0b59b6c2248508d0601ff13878d33",
+              "891cb726d45c4fef8f2c74a56df5532b",
+              "fa39189070334939aea5fa4a7de5ec8b",
+              "f0e107dd6d54483aa367da0e337a97cd",
+              "861a00796f55470e85d94733eeee9a5f",
+              "5459633eb6e94ec391d13fcf67425726",
+              "b7b7467ece304ffbbd352b9b96a03aad",
+              "9dece059f1204e29b106fca9e191ddb3",
+              "e2e49c25d6fc4592b317e94cfabc2e5e",
+              "76d37a48a73946bab2821f097cf2605f",
+              "8e81ae00681347cb906b392c3656a64a",
+              "74bedc38b7da4e8a83b0c892d7aa59b5",
+              "d1e67c28b4664e8098dce8f5e80b8779",
+              "abe6cf39b784436993fcbe92221c31a3",
+              "d021a18ab70b4c7e8aec43932a124c36",
+              "72e7c092fb054b7ea0dcd2782b5d8a7d",
+              "8b1ea80221174fae943d5c9f997dfb57",
+              "f8073d625f80415dbf712cee434f6e3a",
+              "5f6014ba13fa4a659b9eb1b5f83599a7",
+              "327ff8f5292d47afbfebd3beea187739",
+              "988cac4341b646079fc73719f3f88ad7",
+              "900a4dac08f540dfb35c29f63236a12c",
+              "1e6009b9b0684b8fbaa379ea96f111ee",
+              "541b9b4e74614e2cb855bb90f03df538",
+              "ff256b2275f740ed82bca4f43b4d6fd2",
+              "3703041a499c426bb427ee008c81cde5",
+              "4b22bbacb995425fb32a2368f3685a92",
+              "49a66eeb9ef74de5ab8904fd90eb7558",
+              "08f9d125018b41c582a0fa1e234315f9",
+              "736c770230644894b85dbc34bd8f1d52",
+              "b67cbbf32f844a19b219be612d5038c9",
+              "774b513d64524ac7823a2cf13efa8d41",
+              "1e56da93bcf64ff490416d2b66cd3dc0",
+              "b7e35038ce344110b785753b655130f5",
+              "5472af91737446f4a4a2d92a3f684a45",
+              "9fb4368802da4a5a8101ba200d98403a",
+              "2e713bcc372e48b2a006558db4d1df68",
+              "1a277abd5ea44253bc6894bef258b52b",
+              "b3eedd82e7da4ce8b3ded70e49a2afd0",
+              "6f5c18cb8002471f8b3764effee37324",
+              "3bebac362b344e8d9103c5011613f1ea",
+              "670905a55b19458da69f83c8bcd511d1",
+              "ff54451a48394faaaa9d8cdb690d0718",
+              "36b5bc19b2d0407f8ab28ff0da2ce12d",
+              "879e48d9a9e04183903d94ffe98313d2",
+              "abce503d70594c2ca9afdc47847c125b",
+              "028e291ee53947bbbbc4bfb68c695f5f",
+              "a530662719374c95a9bef12e59e28c85",
+              "bffc0f4b12f141398535990709fd4f2c",
+              "04804c74e1dd43449d5f758cf5d0ba5e",
+              "95a506c3007c4525b01ee4e1600d671b",
+              "a0d6b0caeb2340fe96c8f5569e3d3ae4",
+              "30798f87a8b848d783fdacd71af5dc04",
+              "07ce54c75e76488ba4019a20b3707061",
+              "f023175de68445f98a6b01bb40ccdc6d",
+              "7389b79a0ff44cd68c7866995d728023",
+              "8e2b70ffe4eb4974bd6393fcc1292267",
+              "13eee164dc534424acb9dc9ee37a9465",
+              "722a7fe16af3422585a20c651345cfa4",
+              "f5596c1c9c4d42f3bc171961f9582eff",
+              "85d66e615b5742e78657b1e60c75fc72",
+              "731c02dc5dd446c3b22765575148e256",
+              "254ce460ce244c99a5afe39d5d51f6b7",
+              "4cf1dc345ace4da59f978f661487f975",
+              "8f30fca71bf24e5ca26e17c2321f893c",
+              "dd85d37dd1d14c7ea4592f8e11b2d2c8",
+              "3cb06377e4454f009d6b2aa7aa6ff0a9",
+              "4502477db4d948e693012364c2dcb370",
+              "52fe404ec9c14db2a7279b4c154eef3d"
+            ]
+          },
+          "collapsed": true,
+          "id": "E1UFuJC570Tk",
+          "outputId": "aebb69d4-c167-4de5-eb8a-dd19dd538f63"
+        },
+        "outputs": [
+          {
+            "name": "stdout",
+            "output_type": "stream",
+            "text": [
+              "Not in Google Colab environment\n"
+            ]
+          }
+        ],
+        "source": [
+          "import os\n",
+          "\n",
+          "try:\n",
+          "    from google.colab import userdata\n",
+          "    os.environ['LLAMA_API_KEY'] = userdata.get('LLAMA_API_KEY')\n",
+          "except ImportError:\n",
+          "    print(\"Not in Google Colab environment\")\n",
+          "\n",
+          "for key in ['LLAMA_API_KEY']:\n",
+          "    try:\n",
+          "        api_key = os.environ[key]\n",
+          "        if not api_key:\n",
+          "            raise ValueError(f\"{key} environment variable is empty\")\n",
+          "    except KeyError:\n",
+          "        api_key = input(f\"{key} environment variable is not set. Please enter your API key: \")\n",
+          "        os.environ[key] = api_key\n",
+          "\n",
+          "from llama_stack_client import LlamaStackClient\n",
+          "\n",
+          "client = LlamaStackClient(\n",
+          "    base_url=\"http://0.0.0.0:8321\", \n",
+          "    provider_data = {\n",
+          "        \"llama_api_key\": os.environ['LLAMA_API_KEY']\n",
+          "    }\n",
+          ")"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "635a7a6f",
+        "metadata": {},
+        "source": [
+          "Now that we have completed the setup and configuration, let's start exploring the capabilities of Llama 4!\n",
+          "\n"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "0fc75d73",
+        "metadata": {},
+        "source": [
+          "## 2. Running Llama 4"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "7dacaa2d-94e9-42e9-82a0-73522dfc7010",
+        "metadata": {
+          "id": "7dacaa2d-94e9-42e9-82a0-73522dfc7010"
+        },
+        "source": [
+          "### 2.1 Check available models\n",
+          "\n",
+          "All the models available are programmatically accessible via the client."
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": 13,
+        "id": "ruO9jQna_t_S",
+        "metadata": {
+          "colab": {
+            "base_uri": "https://localhost:8080/"
+          },
+          "collapsed": true,
+          "id": "ruO9jQna_t_S",
+          "outputId": "ab1722a7-62ab-43bb-9cab-4e45bf62068a"
+        },
+        "outputs": [
+          {
+            "name": "stdout",
+            "output_type": "stream",
+            "text": [
+              "Available models:\n",
+              "- Llama-3.1-8B-Instruct\n",
+              "- meta-llama/Llama-3.1-8B-Instruct\n",
+              "- Llama-3.2-11B-Vision-Instruct\n",
+              "- meta-llama/Llama-3.2-11B-Vision-Instruct\n",
+              "- Llama-3.3-70B-Instruct\n",
+              "- meta-llama/Llama-3.3-70B-Instruct\n",
+              "- Llama-4-Maverick-17B-128E-Instruct-FP8\n",
+              "- meta-llama/Llama-4-Maverick-17B-128E-Instruct\n",
+              "- all-MiniLM-L6-v2\n"
+            ]
+          }
+        ],
+        "source": [
+          "from rich.pretty import pprint\n",
+          "\n",
+          "print(\"Available models:\")\n",
+          "for m in client.models.list():\n",
+          "    print(f\"- {m.identifier}\")\n"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "86366383",
+        "metadata": {
+          "id": "86366383"
+        },
+        "source": [
+          "### 2.2 Run a simple chat completion with one of the models\n",
+          "\n",
+          "We will test the client by doing a simple chat completion."
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": 14,
+        "id": "77c29dba",
+        "metadata": {
+          "colab": {
+            "base_uri": "https://localhost:8080/"
+          },
+          "id": "77c29dba",
+          "outputId": "4857974f-4c70-4bc4-f90a-6ae49dc9c41e"
+        },
+        "outputs": [
+          {
+            "name": "stdout",
+            "output_type": "stream",
+            "text": [
+              "Here is a two-sentence poem about a llama:\n",
+              "\n",
+              "With soft fur and gentle eyes, the llama roams with gentle surprise, a peaceful presence in the Andean skies. Its calm demeanor and soft humming song bring serenity to all who belong.\n"
+            ]
+          }
+        ],
+        "source": [
+          "# TODO: update this with a vision model\n",
+          "model_id = \"meta-llama/Llama-4-Maverick-17B-128E-Instruct\"\n",
+          "\n",
+          "response = client.inference.chat_completion(\n",
+          "    model_id=model_id,\n",
+          "    messages=[\n",
+          "        {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
+          "        {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"},\n",
+          "    ],\n",
+          ")\n",
+          "\n",
+          "print(response.completion_message.content)\n"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "7737cd41",
+        "metadata": {},
+        "source": [
+          "### 2.3 Running multimodal inference"
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": 15,
+        "id": "e7b1baa7",
+        "metadata": {},
+        "outputs": [
+          {
+            "name": "stdout",
+            "output_type": "stream",
+            "text": [
+              "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+              "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+              "100  275k  100  275k    0     0   847k      0 --:--:-- --:--:-- --:--:--  845k--:--:-- --:--:--     0\n"
+            ]
+          },
+          {
+            "data": {
+              "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/4QmWaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wLwA8P3hwYWNrZXQgYmVnaW49Iu+7vyIgaWQ9Ilc1TTBNcENlaGlIenJlU3pOVGN6a2M5ZCI/PiA8eDp4bXBtZXRhIHhtbG5zOng9ImFkb2JlOm5zOm1ldGEvIiB4OnhtcHRrPSJYTVAgQ29yZSA0LjQuMC1FeGl2MiI+IDxyZGY6UkRGIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5zIyI+IDxyZGY6RGVzY3JpcHRpb24gcmRmOmFib3V0PSIiIHhtbG5zOmlwdGNFeHQ9Imh0dHA6Ly9pcHRjLm9yZy9zdGQvSXB0YzR4bXBFeHQvMjAwOC0wMi0yOS8iIGlwdGNFeHQ6RGlnaXRhbFNvdXJjZVR5cGU9InRyYWluZWRBbGdvcml0aG1pY01lZGlhIi8+IDwvcmRmOlJERj4gPC94OnhtcG1ldGE+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgPD94cGFja2V0IGVuZD0idyI/Pv/bAEMAAgEBAQEBAgEBAQICAgICBAMCAgICBQQEAwQGBQYGBgUGBgYHCQgGBwkHBgYICwgJCgoKCgoGCAsMCwoMCQoKCv/bAEMBAgICAgICBQMDBQoHBgcKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCv/AABEIAwADAAMBEQACEQEDEQH/xAAfAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgv/xAC1EAACAQMDAgQDBQUEBAAAAX0BAgMABBEFEiExQQYTUWEHInEUMoGRoQgjQrHBFVLR8CQzYnKCCQoWFxgZGiUmJygpKjQ1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4eLj5OXm5+jp6vHy8/T19vf4+fr/xAAfAQADAQEBAQEBAQEBAAAAAAAAAQIDBAUGBwgJCgv/xAC1EQACAQIEBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/APxxgtYgAAtfLxrVGkfVe3qvqXILSMDOwUSqzLVWrbcmht4mfG0GpdSfcqNao+pI9tEvzKgNT7SfcbrVF1LumwROmcVnOpPuaQrVWtyxBbRiXIXP4VDqTLjWq33J/IjLY2A1Dqz7l+2q33B4o1b7n5U/aTtuL29VdS1p1sj5+X8aznUmVCvVfUstCgOAtR7SZft6vcIIo/MOVoc5gq9W+5dsYkL52/jUSnM1hXqX3LEsCk8rwKlVJ9zSVap3IvsqHkoB+FN1J9yPa1X1ITaIWYADkelTOpNDVaqnueEfF21ji8WMNoxu5r67KKtWVA+PzXEVXidzuvhbDaSWUQSLoBXn5jRn7S8z38BWq+xVmemxQqsK4TtxXiuTTsj0/bVUtxfIUuAV7/lSc523E61W+5JqUCC2UbeamE5t2Q6leqorUrw26sgG0UnUnfcI1qltxViUttA/Gp9pMr21RdQuLZCu4qM+lONSb0uEqtVK9ySSyF3YFQoOBR7WaluQ61Vx0ZV0uAwxmIjGDitJTk9TOlXqrqXLS1BnL7azlUkkbwr1b7kd2P3u0j2ojOdgliKqluP8hPLBIGcVHtJX3NPbVLbiGJScBRSdSY/b1e5JHbocfL1qXUn3KVap3LFvbp5g+XuKl1Jle3qrqbSxqZF46ADpXRCU3RbM5Yir7TcsxwJn7o/KuSVSfc3Ver3J0iUjoKh1J9y1XqdxkkKZ4Wlzy7h7ep3IzBGP4R+VHPIPb1O5FPGozhaanJ9ROvUXUjiRTxsGPpTc5i9vV7kbIok6VSnK24e3q33C7CCPGB04pKpLuKVerbcjto1I3Y+tDqTYo16vckeJSfujFLnnuV7er3GiJCQABT55tbi9vU7kkkKmLIWpU5jdepbcgghViRj9K055mca9V9R/2RNhJWiNSV9wdeq+pRitF+0k46H0rWVSXLuYxrVFPctXMaBMFR0rLnkdEq9VdSBYEbkDjvxR7SXcSrVO49IE6EfjUOpJ63LVep3GvHHu+7UupJLcft6j6ixQpnO2p9pN9S1WqdyRoF24I61KnO+5brVO5DHBH5vC/pWvtJ2Od1avNudJ4ShjE2Qo69axlUnfc0hXqqVrieMbaNroEr39K0p1J2M69eqpWuUtVt4z4clXA+4ePwqHVmp3G69WNHRnyv4ttIl8cXCmMf6yvuMHXqPBp3PicTiKrxb1Om0K2jUIdnp2rmqSqT6nrYWtPld2d34fgjMakJXj1p1E9zup1aqe5uRwx/3RXO6k+50+2qW3LlpbxkjC9azlUn3LjWqdzQggjBB2/Soc5s0daqupfECeVnaAPWp55sp1a1hIbeMoTihzmnuJVqvcqLErzMAPxxVc8jNV6re5FJaoJOB071ftJ23EqtW+40W0ZVuB0qXOdx+1q66mfYWMP28sE7+lbe1nynJCtV9puab2y78bahznbc6nWq9wmt0EX3e1R7SfcbrVe5FYWyNNkKOtN1JdxQrVb7jdThTzApWmpza0FVr1U7XIbuGMWnKinGc7ilWqqF7mPbxIZSNvfmtXKZhCvVfUvQ2yEcLn3rNzmjZVqvchliQvwtNVJkurV7kZt0xkLVe0mL2lXuV5YRu+5Ve0n3E6lW9rkUkSjkpRzzZLqVV1IZY1IO0Cr5pcl2Eas7XbPof/AIJ8+HEW/wDEnidlwdsFpG//AH07fzFf0F4I4BfV8VipbNqP4H8O/SrzqpXzjBYFPSEHJ/N2R+gXwH0yL/hWOvXEvzFlAXNfuc604VoRi9Ln8aYyk69KvVf2FG33nyr8f9EimvrtWT+Jq4s1qSnFn6LwljasaUHc+Iv2gPA8VxHdKEOSpIxX5LncZ6rof09wjnFWEoO5yXg7UDrXhW1vJzmSJTDOWP8AEhx/LBr8AzOjLCZlUg9r3Xof1dk2Z18Zl0W5Xa0LEsCE9B7VlGcrHoOtV7jWtYzHnaKaqTF7WrbcpNbR+ZwBxWvPUsZqtWvucn8UrdBZqdo+telldaftLXPJzbEVVHc4W2to/MXC817rrTfU8mlWnzJtnd+FoUa2A29Bya8bEuo5Xue/Rq1GrxehrG3jJwFFcLqzXU19vV7lS5tkEhG38K2hVmzGVWt3IpbVBHnaPzrVOo+o1Uq23KciR9NnzfwkVTpubvIMRUnGGhv2i7wDntXO6dOGjNXSpqTVy/Ase3aWrnnZbEaJkkATfjcMH0qXsEVdk1yVRMhhShe5pKKvZFrRdpTDnAPvWddJbMulGFi0NqTHa3TvWW6HsyZAhwxYVN7HRCEZLzI7qQKSY8Y+tXBJoUqT6l7RzmLJYdOazqxSejKpQp/MnlaJWO5xn61KuW6TvoRW84MxXitGrRJjBKRpaafmyxwO1YVLWNYxgtS1JyRgjpUKw0k5akbsqrk8/hVKzdjV00tSC3dDKd3p3rapStFM57S9oeE/GotN4yMcWNuetfXZVKNPDLufL5jQtiLyO8+FFvHDpsZB5wOa8XMqlSrVZ7eAcY0bHpEDO8CknjHGa8V+47M9KXK4qw5FYyAn8eKTasQtZWZPqkZ+yKw5xUUpJSNp000itao5i+YYAHHHNXKK6mduV2EYfOc8+vFQkjSEOZXY+7+W33L1Fa04LmM5dhdJufMiKYGSO9OrSUdUaUow6kMkc0U8hEfHfiiFpKxlOnGN3EtWNxCM7h1GKyrQtsVRlHqVrwM1xvQdT6VVN2iN01J3JimIvfHpWcoxi7gm3oNRDnLDn6VNk2aWsieNegx3olCKBPUnjIR1Y9jWdkNtI07WdJphgiuhK1OxinzVS+pVSe+a5XGx1bD1bPVcn6VLVtykmxCpPRf0qWkPlsMKknG3mhxSVws2yK5t5yMqn40RcS1TbY23tLhjwvP0rbliQ4yTegraReNICqnGeeKpRp9xKMmWJ/Dd3JFvzjHtXPGUVLRmvsnIhg0r7P8Au2lJb6VvyQtdshxcdESf2PNJznAPcCsZNKWhoqMmiMaPcK+Bzirjytak+ybZLJpcnlc+npWX2tCnRlYrxaXODkc/hW9lZXOfk5W0NlQwxnzODg4GKapXehbilEzIGllvCFXODyfSt6lLk+I5owu7ot3lrOYxx+lZqMTaMefRkUVpcAhSuSe1S4wNXTstBy2twDtaL9KzlGCWhVOk5A1hcsSFTj1xWas9yZwlFiJZXgbHlkfhV8lNFxg2iV7C7EeRH+OKxaV7BZ8xWSKaOXEi85rpVOPKTKCjK50vhFR52PzrlqwtqghZz1H+MIx9oAUd6KTj1CvGPPqUNTjzoEoYfwH+VNqLejKcIOmfL3im1eTxzckAf6w4/OvtMFGP1NXPjMVCh9bdmdVoFg+E3Edq58RKMY+6ztpQvojtNHtxFGCrYwK8erNvRnq0lBKzNe3jyeSPyrnlY1ajfQtwoBgZFSrGtOMWy9bEkgggCqjBLUupBQRcyBEV3D6UWT0LjNONhFnjSIgtj04qZwSepFRKCKUMgaVhu6mnKEUtyKcFJXFmxnCGhRsyE+WepAkyorZOcjvVummbPlaKmmTg3xJ9ac6bS0OKMH7XQ05WDZcMP8KlQN9b6kM1wPL2hucdKHSinqVJRtuN02QF8k/pWcox0dyqVLuR6nMhmwGHvWkIwtuc87upZkN1IhtvvdO1aJxTOicUqdjKhaMyli9aNpvRnFRbvZIuwSxrHwwI9TUSipHY6aauQNIXkySOe9Hs42OeyTaCQlD7UlCI4pSe5Wc7nwT9Dir5Ioc4JK5Hc/d4bOPatoxMYz5SmJcngj86VS3LsW/fWp9cfsMaOLH4VtqG3DX+qTPz3ChVH8jX9Q+D2GlR4RU39ucn+n6H+cX0jcbHE+IlaCf8OMI/hf8AU+3vgzbywfDDU8ZAkzxjrxX6dVilXppn89uUZYDF2fRHzR8cbDdqFy23qTXPmMFys+h4Xq2oxPkf45aP5bSSFMqwPavz3N8LCcWf0NwriINJXPAPBtwNK8Sat4WlOFkYXVsPXsw/lX4fxhlsKU4YiPoz+suBsV7bDOnfdfkbU5Cnrz6V8dTacrXPuYxUpWIzcRxoWaQAe5rVPWxdflhHUoyXFuZt0cynJ6ZroV+XVGFCopSstTlvilIn9nBmIwK68upSdbQ8vOIKyscJZedPKoRRjI5r6OUKdJXkzy6dJaXPQPDSxRWi+c2OPpXzuKqy9o7bHuYdQpI1AYiTtkH4Vwtu5cVGUtyjcn98SzD2rqp3gjphTjErX2q6dYxZurhV7YJrohCrU+BHBiKtOFWzZDbXFrdfvLd1ZT6Cs66qxXK0ac9OS5pHXWfhV1jUGftXFVxMXK56EsHeTdy7H4WIPFz+RrJ11bYyWEcnuTxeEgW3G4P4GlKukrpFrB2ejJn8JBhtE5NZQxL7G6waa1ZNaeFni4ExA9Qa1nVhKJmsHJS0ZbTwuuc+cScda5/aK50fVNNyxbeGCx+ab9aznVS2COHaejFuPCYZsJN7GiFfubexbjqT2nhlowFWUj1IrSpWp8uxgsLJO9y3/wAInG/Lzc4rjVexuqEu5EvhJVfKyc9q6IV7rUU8N5k8Hh5oiCHPvzTnUhJWsZxw0l1LI0iToZDXPJxR0Rw73uMbQpSCBKfxqfapHR7LQaugSwHeRnIrZ11OFjOVFx2PO/GXwM1DxPrx1OO62rnoK9LCZrHD0uVo+dxmVVsRW5uY6fwd8OZvDtqI5p87R3rOvjadWVzqwuDnSjys6OC1ZIhHnIHeuWo4Se56EKMrWJ4Ik3KSnQdqyaS6m8aSW5PIiXEflOvSsrcrvc0UF1GxWUKHBWtHUTREqcbjnsbUSfMmD1GazjNpXNlGKWhDe3WlWMX+kkYx0NaU5TqStE463JF6odok2magCbaAAHoRVV5zjo2bYdUpLQ000qAgl4wfauSFWVzpdKFtiS30jTUOPJyamrVm+pKoQ6IedK08Hd9nFKlUa6mrpwUbWJYtN04rt8pevcVdSUpLcinShzbEqaDpzHcUXB74rFTcTaVOmyaPQNLA6D6EVLnKRmqdIevh7SmGCBU88l1L9jSkTQ6BpcB3IRVRrS2uJUKUXoWItMsM8sPzpSqNLc0jSp3LCadpqDO7rWPPJlctNCSWtgOg5xVJu25FoX2GpBaKf4cGpnK/U0Sh2FkgtCMFFIrNSsyrwS0INlohyBj0rp9ppqZPlfQXzIs/KfxHFR7VRZPKr6Djl1y05xVKvT/lK5JLZkUltETuZ8n1qpV01YFFX1Ii0UXCseOxNLmiDlYT7ZCvXnNHMQpa3Ip9RiAw2OParhYtziyu+rWqNuxjjFdCszgqTakQXF9b3g2bRk+1aJcqumEZqWjKwFtYP5yJ1PNaRftNGy3aEbpEU/iSxUlWTk8dK0jh1JnH9YfNsSW2t2JILYHHWoqUY9DqWJioki63ZFuxx6Cs1h09yaeLvJjm8QabGucDntQ8PFuyKq4rsiNPE2nvkrEPxq3hVsFPF2Wor+JLIjAUAVLwKT3JlX5myOe8guo98Sjgfw9qToSS0IeIWxq+DZiZNpGea4qseWVjow8efVljxkzLcAkY5FZw1VhYlOMyhqbr/wAI/Kcj7nrVUqTcrMqzdJ2Pl/xQks3j2ZYyV+evucPCNPAbnx1bCSnjXqdp4a0m5MYLuRwO9eLiK9NaW1PXo4VwW50tnDcQrhZMj1rklKDjqdUKMpbM0YvtAHJNZRlTN/q8l1JohdNyHPtUyqQj0NorlHT3l9aJvDZqY1oSdrCrKTjuV7XxHfXjGNWxjjNdU/ZUkclOck7DrjUr+Pjfk4qYToSepVV1KmxENRv4FEzn6VTlRY4TnCNipP4zeF2Lg/L1rspYeE1c82riKvO9B1t4rS4bdnr09qdSgoHXSxEWtWKviCGCffn8azcOaFrGsasU7jLjx1ZwPiacAHtmrp4SVTaJyYjFKEhbbxSt+NlrJke1Z4ikqK1Rvh60aivcu22oXSDAb6nFcDdJnV7aUXoNmurmSQMzZI6VUVGxm4SlLmEuHupYSA5GRWbqQjKzNW5WsZyW13HMW80nJ69q19tTa0RjKm4LmRK8t2nrx2xRGUGtWTGU2V2uL5TuOQPcVsnTtuVaS6EbarO3yljke1HKkYKfJO5Vu9VvIR5pQkemaqHI5WbLq1HyMypPFV3cu0cUbZB5yetetDCxpw5mzyY4i83Ysx39+bbzMAcZ61xVYU+bc1+tVJrY+/v2UNEOjfBTw5byLh5LETPx3di39a/sTgXCQwPCmFpJfZT+/U/y18VcxlmfHWY1273qSS9FofYXwwtmi+F07KSFcN+Py19LiV/tUEfmNG/9k4qTe7t+B85/GiwElzO2MfMcVnj43iexw3XfJFHy/wDGPQEuLWVSnQHjFfF5hC6aP3PhnF8lSJ8mfEO3/wCEc8XW2ux4QRSFXP8Astwa/LeIculisLUp/P7j+neDs3lh5wce5Fe6vcOzKs2OevtX5bRo04S94/ao1KjlzIz9Qju7m2JF4RjqPWuqjOjTqJuNzLEOdeHKVdG03UIJxcS3e5Sfu1WMr0qmkYmOHpTodRPGOkXmswC3jBAx3pYOosOm2bVqbxEe5g2XgTVrdgxJ46HFdTzCnUdmeQsJXU2bVvpup2wVc5x2xUTlQcb9TupUK83YuRLfBcFSCe9cLdK53woThqQXlnf3ERCEjjitHUpRtcqftEjlta8LazdTbnZnXPAr0sNj8PTjY8ivg61eTdjQ0DTb7TVzcK2MdKmtXoVfebOaFKvHc9atcBA27qPWvlHB31Pra0p+0aLcKDjDjrUVJ6WQoSadi1Eg/v8A6VHtNLGimTRoBwT2qOaxfO2Txrzgt+lVz3Qc7RKoUdHFQ5K4c82ToRxuNQ5IuMpImQLjk0uYvnZLGwU5Bx+VRJ3BTZOrgjJP5GkrFqUujHBwBwfzrRNInm11HKynvQ5pGkXF7DhIucZH1qG29Sm5WGPNtPWr5boqnK+4Rzh85b6VPK4suUmWISMfeHtSaSZg7ykN3HJBlH0ptpI0jRas7jti7QWcH2rL2rYno9BokgXgYP41Sk2TzNjhND1bHPTk0pK61HzMeskb8KePrWfNYHqOEKu4Zjx9KUqlkXDUzfEnh+LUovLB5xwQK1oYiVN3KqUFVjYf4P8AD95pShJGyvrV16kaupy0aFSlN9jqIY1Y/vH49K5Jy5dEd8WupL5NmvLyL+JrLnm0bxSkCrZOdqyrx70RUmwqRUUEiWiHHnD6VquexNNRb3HRvbE7TcD86xqcyKmoomSK3b/lv+tY88kQoxfUebeMni4/Wj2ja1G4We49LRCRib9aFJIpU49ST7GoH+t49zQ53D2aJY7VM5Mw/Opchqmhz20WMCcfnQ6jtZh7OPcjMKA/64fnScx8iAQxscecKlzGqavuI9rGOso/Omqg3CKIXhiBx5oq+e6I5EKI0UYDfjmk5lcqGvGp5z+tHOZuFxnkRnqw/E0nNjVJMhkhgzgsB+NUpsUqaQz7LaP8pkX8TR7SSEoRZDdabYEYLrn2NVGtU7l+wiykbOJJQY5x+ddCqVOpyyw+ug99OjmXbJKv51lPFST0NY0boqSeHLKST5pV/Oqjiq0tmafVKbjqTL4dsNv+tXH1pe2rLqc31WLeoLoWnqcGZfzo+sVktxvB046jbjQdMCZ80ZqFi619zSFCmyFdL0iIbHkHPvW8a1fmvczqYamnoVNafRrGJWEn611UnWrysc1WMYosaTc28to0kWMY4ya3k3B2uKnRTV7G34P+a8O0cZrmr1EzuoRjFk3jbcs4BPGe9Z0mc+LSc0Z18N3h+UNz8v8ASuiL982ikqWp86a3bxjx5KZCCS3H519NRU3gtWfI1sQnjmoo7nw+HMYRHxwOoryKyhHdanrUY1Jam7bqIiBI4+mK4KtVNWOxTUdiyvK53j24qITWzKTqMhvdXj06PzJcYrphS9s7IitNU43ZDp/ie01omKOQHBxWVfCTwr1McNX+suxoWtjbROCzJk89Kz9pKUdTrqUILUsta2knG9eenFczquLsghGCGy2ds67PNT6YputKLD2cXIy7vwvZyyljKnI7100sdVSsCwcZXYtt4Vs41wJkqni6j3ucksHaTHP4WsZThpxz1rKWNqR0RrDDR5TN1T4f6fctn7Qv410Uc2xFPYp5dSq7ljSfC9ppagLcJx0FTUxdWu7yMFg40Z6M0VW2U5LrjFYTqPY6FCC1ZFLdWcLckEe1aU7yKdSK2K/9s2TsYt2PrRUpVIasyTu9R2bdyCJhU020tTeShKGhKkMDn5nGampUeyMI04jZLS2YY81eahTkU1Eoz6ZbiTargfjXXCo0tTGdKMxz6LBJDsaZcYrJ4i0roPYJxsZn/CK2cM5cTrya7Y46pOKXYxngKaV0OutJtkjEUEoJdgoA9ScVdKpLE1owitZNL72cGNorBYGpXk9Ixb+5Nn6M/CzTBpXhTS9JRSFtrGKMLj0QCv7qyqisNgqNH+WKX3I/yJ4jxDxOZ16z3lKT+9tn018PraWL4fN3Romxkd8V24lp4mK6nxmH9pLAYmT2ueD/ABdsvMeZv9o0Y2LcT1uH6nLynzf8T9LEsMyleoOK+UxlJSufsuR1+WUT5I+OPhkzi4XbzyVr4bMocsmf0TwnilFxbZyfhGzj1rQorqQgyxExTexHH8sV+F59CrgsznBbPVH9KZNi6eOwCfVaMnvvDzPEyQybSRwc159HFSi7S1PR+rqexR03w/qEU2J7jcF6c131cThnC6WpnDB1FN3ZuQWSYG8Z2jnivPlXvsdcYRoaWHSwwL8rLxWcJSTvchQjUldGdcXFnDdiJkH0A611yjWnS5k9DOpUjTmoomNtA3KqMYzjFcfNJHbS1jdhHawLkNj6YpOc5aJinCDI5tPimY4Ax24q4qoiXyQgVJNORA3HQdK1qPkhZHOsPGUtStD8W7BQNoTn1NdkcsnVepxwzWGImy9B8V9NCB5FQY965p5ZK9kOeY0obFiP4v6P/EU/Os3llQxWbUyaL4uaMy53pzSeWVGbRzSla5Ivxf0c8F19uaHllQl5tTeg9fjDpP8AeWoeWVB/2tBEsXxn0sfxLSeV1RrNYMmX4z6X1ytR/ZdUr+1KZIvxl07HG2h5ZV7lLNIWFT4zaavULS/s2oNZpAd/wurTC2zcuT2NH9m1TSGPjN36E9v8WrOc4QqfTApPL6iOn+0aUVZEo+J8G7n8iKby+pylfX1KFxk/xQh2HOPbitKOBlcini7vUqt8WIIuuPyraeX3Z1xxcEhg+N+mISskwBPqapZZKTtY8/EZnCFayIn+NOklsi8GD1BarllnLpJHXRx3MrtliP4xae6DF0v/AH1Xn1MtfNZI56uYxU7Eq/FfTiNz3S/99VP9nzQ1mUIokX4taSOTdL+dJ4Cpcn+0qbJI/i1pYwwuV/76qHgJlrMItEg+MGnIc/a1/wC+ql5dMHmUYu5HL8X9Pc5+2D/vqtaeXyTKjmysCfGmyhPyz5/Gtp4OytYzeapsk/4XbHIfllGPrXK8A2y4Y/mY4/ErVL+Fri2yVHcVVPAJO0jaOZSTsisnxRukJ82Vht64Jrs/s+nBGk8wTjqLL8arUKEa55z/AHqUctb1ZyUsx5p2Q+D4x2rjcLnj/erCtlyex3zx8Iw1ZYj+NVoP+Xsf99Vyf2XJvRHFDM1zEg+N1ooyLz/x6tFlNTsaVc1gpWCL49Whk8tLvPr81XLJuSN5GlHMeZ3Lf/C7YP8An7/DdXO8rcn7qLqZktkOX42W68tef+PULKZvoRHMPMa/xwgH/L2P++qiWWOL2IeZq+40fG23Jz9rH/fQpf2a+w/7SQo+NsI63Y/76o/suTD+0ra3Eb44Rnpdj8TR/Zj7E/2onuxv/C7EY8Xa+/zU/wCzGCzJdxR8bGbhbkE+zUPK2DzPzA/GaUrkz/8Aj1X/AGVIP7RklcjHxiJPM/8A49R/ZbbCOZ6kNx8YIwebsD/gVP8Asxp6oKmZruRD4txvyLwYH+1Tjlt3sFPMU5bjZPi5CFy12P8AvqrlliXQdXM1GVrjI/izBIcC54PvQsva6EQzHme4+X4swRD5bsfi1KeWN62NJZiodSu/xbhd932vHPrVQy9R2RLzh8th6fFlMcXo/FqcsvUyP7SW4rfFmNFybwf99VEsqjYHmXdiJ8XoWOPtX61m8simOGaa6Edx8ULdut9jP+1XdSy9ON7HbDGqpHUoah48t9RQK2pA47ZrSnhnSnexwVputOxu+HvHMRshB5gOAOc1yYjDzcmdscTTpU+W56h8LrsakDMORnINebVoSi3c1w9d1GXPHgK3QyO/NEXFLQMQpc9zMvyV0GR06bK1i1zXZsoTq0T5r8Uaxa2XjmaW5lAAb1r63DOUsHaJ8riPZYXFNvc2rD4laTCAkVwhz15rknldaory2O6jjY1UaUXxN07GTcL+dedUy1xlZBUx1OE7JkyfFPTApAuUP40QyyftLI6aWLS1ZT1Lx/p2pIYjcA59DXcsDOj7yHWxNOcbFPS/FOn6TMXjmHJ9ac6E8T8RhQrwormNX/hY9twTcjjoc1xVMByuyM55ipPckh+JNtzm6Hv81Zf2c29i6WOjJ7g/xLtf+fofnTeXOL1QVMdGEtxv/CybRz/x9Dj3p08A1LY0pZom7XGn4j2yk/6WOP8AarepgJKOxWIxsIxvcVPiXblsC7B/4FXK8v7o5o5ir7iy/Ea1bBa7H/fVOOB5XdI6HmkYIj/4WJadftgP/Aq1eFdrWOeWZRmxr/EO16faV/76qHgX2JePiRt45tZutwPb5quODkmXSx0WyGbxfZg7luQD6g1rLDTvYdbFwtoFv48hU4N0PzrKWCdtDCGNu7XJW+IMC8C5X/vqp+o69y3jYrqIfiHB3uR/31VfUH2E8dHuNHxAtXODdL+BoeBdiFjot6MlPju02Y+1qM/7VCwVnsbfXow6ld/HlmrYW6BP+9XSsI1HY1ji3W3Nz4Z6hF4r+JPh7w+swdrzWLePZnORvBP6V6fDOVVMbxHhaaWjnH8z47xJzqGW8D4+qnqqUvxVj9OvC8QQIingYAxX9q0ocskj/JrHzcm2z6I8GQBPAoBx80TfxEdvSqxD/wBrifPUFfLaz831PFPilbLJ5yg9GPatsTG8DuyWdnE+eviLpxdX445r5jFRV2frmT1rNWPmT416BhpJVTjntXxWbwitT9x4XxMpJI8G07WU8I+ILzTbhsQXQEkeTwHHX9P5V+ScV4RYnkqQWq0P6d4Nx1KnQcJvdfkaE3j7SRgSXKj2zXykMsrPofXLHQc3y6kR+IWkRkhZ1P5VNTLqiVjup4iDV2LF8Q9OZ/8AXr+dEMrqbEYjFU1TbEu/Hlgy7hKvHcV0wyySdjzoY+F7Gc/jXT7iUSblJHTmtKmEdOPKjf21NvmY/wD4T2JTsYrisll6lFs1ljFy6CP4/iYfLjgVH9nKLOOGMftNWQN8QIkyGYZI7U54F8tjpr4pSV0NTx5By8jDPYetZzwFSqvdRzVcypw0uVYPg/clV3XBBxXbHMVTm1Y4o5U8PUety5/wqOVItxuCePWn9eg+gPLvaMavwmlYZ8+sXjlcz/sppksfwolxhZx+dWsZBGiyuRIPhHOeftA/E1lUx8U9A/sqVyZfhFMMYuB9c0ljoNFrKpEyfCOccC4H0zR9ep3L/sqRKPhJKBua5H51lPHxTBZVIsR/CGYpvFwMfWiGPg9zVZY7E0HwakkGTdis6mZRi9EEcslfctQfBFXGftq5HvXM80lfY6P7NaVkdF4R+FNjYO3nurketTWxrqRReGy/37M25Phzo8khxGoP0rFY2SjY9iGCpRjYY3wy0lsKUU/hUQx0kafU6S6EN18LNDMDlo14B6U62PqK1mL6vSTPAfixpCaJr7Wtq+F3dq+lyms61HmZ8tmtKHtdEM8O+Cb3WYBNECeOuTWtaqlLVnHThVlojdt/hZq7cAt7cmuaWJpRR0wwNabuWF+E2sk4Dv8AmaFjKNjR5bVkia3+D+qSSYaZhzyCTXLPMKavYiGX1L2aNGH4L6kwCrcN+ZrGOPhe7O2GXTlsSL8D9WLcTn863/tCg0W8sk0WIPgTqUjY881yvMqakQssqIlT4A6mz4Nw2D71U80pcmiG8sm0WrP4DX6XAR52wD61zf2jFk/UKsXZHWzeDofCujCC4TJZcg1j9YdasmjseHeGp3kU7HwFBfaLPdvHhipIOK1rYlxq2Zlh0qtNuxxVn8HbnVbl5hIdu4966pYxpWRbwPuc0VqX1+BFx9xZTk+5qFjOXVmccDUqSsxw+At4OBKffmkszhzHX/ZUbDZPgDqrgmO4IxWzzWnFXsZ1cr0ukSaN8AtVubryi546nNclXMeaN0c0MJNS5TZb9nHVTjErfTmojmajE7qOWTnLUcv7N2rEEl3/AFrSnmkWjq/smwz/AIZy1MEhmb6ZNRVzKPQ5p5S29Bsv7OuoJzvb9aiGZx6lRyh21K8n7PmqJ92Vv1roWY02hyyrQik+BOqIMbz+ZrmqZiovQ4Xl0lKyEX4Gap/AM8+9OnmMZPU0/s6aWwi/BjXEfy1Sqnj6aZvHK5WFf4M+JFPEZxXSsfQcSnl0trDT8GPEL8FSKyjmNGMiY5TNasjb4F61K2ZC35GrnmVLl0B5U5O1gb4CascBWYfnWVHM4Ju4LJ5JkU/wG1iD/WSN+tb1cypuN0c2IyqUZXJYvgPqjw5jlbPWuenmUPaWZrTyqVrkY+BGuF9rSN+ddU8zo2LllMp7Cy/ALWVGTK2KlZjRcdDN5PPlGD4F6mp2mds+nNcn9qxUrE08pm9yNvgfq+/Hmt14Ga7FmVFwuazyp2sPPwP1ZV3LIc98VySzKClYVHKHcWH4Has7Zd2NU82gqbsbzy2UdEB+BuqxuW3n9axWbprUUsBOMLo1vDnwr1SC4AnkOwHmnVzCm4X6nFHCVnPU9w+GeippNusCcAAA14dbESqT0PfweG9mg+IBAuwpHGfzopXuPGJRaRQuIRJoEgH9w1MpSWprSlakfIHxk0u4/wCE3uPKlPLcAfWvusjqx+qK6PiM1g6+L5SnoHg/ULsAhmOevNd1fEX0Rzxpzh7qN6H4a6rPjaX6eprlniacI6lrCVKkrlqz+EOsSNy78+5rl/tCEZXsezSwUpRL0Xwa1gHKyN+dbSzGlKOo44GXMPb4Oa8xwJGNZ08worQK2AqWshR8HdazteR/zqa2OptXRzPKqjkPPwZ1hgBHM/PXk1lQzGnfU6KeVTiRv8GdcQ7TO351vVx1JxuYYjLKnNdDm+DWsFPluG59656WYQ9psXTyqe5A/wAF9eX/AJbsfXmu6eYUXEqtlk5xtcIPg3rTMVE7ZHUZNcDzCHY4f7MqxGyfCPXPM8syP+ZrqljaKp3N3llScRW+D2uAZEr5+tRSx1BuzCOU1ENPwk1xOS7/AJmtpYuhYmWV1G9Bf+FU60FyHf8AM1lDHUeazLWV1Yif8Ku1lhy7/nWs8ZQKeXVHoMf4W6wOVL/nRHF0GjCWWVb6DG+F+s55Z/zNX9bw/kCyyqRSfDDWMcO/51LxdFomWW1H1GD4YayPmEj/AJ0oYui5WMv7Nq30I5fhrrgGA75+pro+sYffQqWW1N7jI/hjrynczuc+prGpjaLdkS6FWMeVHq/7EXww1af9qPwzPfszw2LT3bg9AUjbH6kV9v4c1KWI4qowir8t5fcj8W8d69TLfD3Ecz1qOMF83r+CP038NZEiA+ozxX9QQ5nM/wA68ak4s+h/DKSDwbGGUoDB1KdaKyviEz5yjKUcBUi9L3PG/iPHvkmP+0cGunEK8DpyiVlE8K8d2RbfuODz0r5nFx95n6nlNWzR8+/GPRo5YHO3nnPFfG5pT54s/ZOGMU4VEfK/xV8LecZGVtrIcoRX5tmUHKLjY/oTh/MFDlb2PPl8Maq0p3F2APFfKfW4yVkz9SeCkoc8epZTwlqUowIWyelSpxerZzQp15SsmypqfhzV9HXz50YD61o8RSlK0WddfCVPZ6szjcSzuFEjD15rSM+U4IRhT1bO2+Gnguz1/D3MuDu7niuLESnfU6aFqy0Opu/A2jWk/ksgJBxmuCeIlsmehToJblKXwto8WSEH0zW1KcpPVmlXCwdmitdeFdINuZ/LXPoT0q5zmp6EypKNKxz11oUGSqKMfWtIYh00eNPCqcj1aWEGNdpIryaSi56n0mN5vatItwWRNvhieR1rCpNc1kaUYXRLFpmUOemKwnNJmjppFi00v5sGs5Vi1CLRKdKy4HT8Kl1bgoRuTx6QAPmH4VPtbGns0tSSHStpyFHPtUOqi4xW5LJpvTcMc+lJTu9SVFKRdttOH2bGB07Cl7TlZq4xSuT21iCmB/KspTdyIxTLlpYbcjH6UKcWbQo3RYFksPzAd+SKHO6sgUPZyLENup4x6c1lzNHRGVx0luG4ZeQO1EZe8bNaFe+URwOT/drWpHntYwad9D5p+N0Bl8TFkx96vr8lpyjQ1Pk80nGNbU6n4W2bx6Uuecis8fJe0sbYTllC6PQbGyHloxXqPSvAq1L6HtUrKyNa3sEEZLDPFc3tJLQ3nFRVyGxtl+2lSc5PTFU6bavc56UeeRs21pGkw+QY78VE9EdtOPLI04LONlPyAenFYc7RpJWdwtrUCc7RxUPuQpRehZFuFk2gde9DbaFdOVhFtD5wkznB4q4pA6VpmL4zszfkRYGABxXRhfclc58dT54WHTmDRfCzq525j4461o2qtax56p+xoPoY3gJxeQuwXhiTzV4h+zWp3YBynT1OkSAJNnaPauCVS+x2wUYy1LTQbeq9elYXludVtCWO3/ck5xxQp8zszFtK9yz4WtVN3uA5D8mumy5DippOsdStkuThc/hXFOT2PYilEnSxymOOlTBu5p0KlzZ7JOneqm1YxcrSsVrq1JTOMYrNM0eqK5twU5WtoOyJaujOvLYAkH0p2uzit74WVsGX5k7UW5Tq5E1oOj09ftBfYMZ61m5NoItXsWprBNowg6dxTjN2NHErfYVB4GPpUNu5HOrjktAWzt/CqbfLY0S1uSLbIGHFRDVlNWINbtFMOSMcd67IK+h5+Jb6kGmwAwnI7VnJcrN6NlAlS1AlyOv0rOUrlRmnIsPaqyYb05ojJpGs1pcotaJ5nAH5Vm02zOla9hJbRGIGzHPpWik0rXHNWkRtaKAQAOawb1KTUVcWO12jn8TU6sPdmx72qMhOPxFVFWd0KpG0SpDbKsjEKPxrodmtTlpwi2dX4UiJcL7VlJRTOymrGZ8RE23gx61rRaR5mNbdQqEH/hH5f9w9fpSnqx03+6PlD4sxtN49kCjjca+2yam1gj5HG1IQxl2bPg3TnRVI79qvEzib0nGo7nf6NYZiHGa8atNJ2O+mkdLoulqSGK8n1FcE5I9LDs11tYoziSMe3y1hKc7WuaSkoyLljYRTcmMYx6VjzSizog1NCSaZEZSPKX8q0lUdiG0pE1tpMO7mJRjsRWSk0zoWupDf6dEH/wBSv/fNdLcpQOaqJDpsBXIgU/UVz3aZpTalHQlbTLcpgwr0/u1rGbtuElYg07SYBcljEvX+7UO9jGCUpahd6TbC4O2FfyFaOb5bFNqEgk0yEAful57YFZxbT0LTUxl3pUCxgiFc/StfaSa3Mp2gyOPS7fZkxL+VZ3d7mqScSIaXAW5hUD6VUqjfUyVrjZdJtgM+UuO/FOM5dGXZEEulW4GPLX8qpVJdyJJFdtPtySphXgd1q1KTW5hNJakDafb7uEXHcYqVKSe44KMtSOfS4Uw4jGP92t+eTjuRW90rSW0ajoOv92lST1dzl5E5HqX7FOlJP8Zr3UhF/wAeejMA2Ohd1H8ga/ZvBfCwq8QV67XwU7fNv/gH8ufSlxrp8N4PCp/HUb/8BX/BPtLwvFmZAfUYr+m6TXMj+Asc9Gz6H0NHbweqySbituAoPGBSnriLpHztNyngJuTvbZeR498QIw0swzkZOc111fhsdOVy0jY8V8b2gJcjv0yK+excdT9Jyupojw/4nafHJHLuXse1fK5hC8WfqWR15RlGzPmT4p6YFuJVVOue1fnuNwkuds/e8gxadJXZzHhaL7bogYRqXgkMTkr0x0/TFflOb4Z4HM5Rvo9Uf0FkePhjMriusdGaNtaBpQrqMA/3a4ZVLLRnq0qcd7GX8TLS3OkZCgZXpVYTm9vcyxn8PU8sttLd5SQeCfSvp6fK1dniSwsKlNu56D8N9PlsogVlIPXGa4cdWjJWNcBhpUzoLi0nuZCXkOSeua8SpKy0PTlCXQrXWnMCFHUVpGs1Y6acW0QS6VJLAVOcEVusRdainTbVjJutEaFG5p+0jKokcFSiqcj0W2tTKFFeepcsmezWpc9Vl7aqbYwgyPUVk31OeMnCROkWEz2rGqzafM1ctWUfPK+nNc8iYbkoi/ffMPpU30Lt7xOEbbhl461LlqarYlii5Ax3oS5i0rK464g55HGacU0yGW4IyLXB9Kyne5bTcSazhJTntUSbuVTWhbtYyXwB6VUFodsI2iTXSEHHr6VRhUXvD7VCV5HYVE7WOinFOI8qfMxjGRWcfiNraFTVVC2r+wrp6Iwe7R80/GVwfEmCON9faZN/u58NnbaxFjtfhfGG0uMY7V5eYNqqztwEf3aPQ7CD92gK4x0rwZXctT36EE9zUtoT5e1hgEdTTejOirFKBWhtWS8HycHrW104WOej7s9DYskWSXjqK46rtojpablc1IUDR5AxWFmzRqTQWsRWdjircVymFveJljZpSB6d6zbsaQScx7RFQWxj2FXDc65WSuZN1ZNd3Dbuv0rV1OVHJUXMzH8VaJfahbLZiUhcYwK0oVUpXOerRlVjylvwb4fGkWnkuO3TFRXcqsrs0w9KVHQ0po9knPGPWslE1TtO5YYboQSKmavodq1iSIMwt8o6Vza3OepdJl3wgu66wwH3q6EpclzloL96deIeeRiuaex6kiykAKgFQPSpje5rDUoahEQ5GPrmqZjONpFV0JiyRzioBNlQodprWGxstjMvYzvbjqKq9mcMviF09CqgEelEm7HZTs4lgQlLkntXO2zF6SLU0ZaMEgcdaqDudMNUVJYtrHjtVnPU0mJbrk8jtik1c6I6of5ahge49BUU7ph1INdXdF97jvXXA4sYivpyARbcdqyk2VS0pEgGLjBHfis+hK0mWZF3RcjHHWneyO56w0M8g+ZtwPxrNNnND4wljZeMY+tDkbVfIbsJGMdRWWtzJJsQKwHPpVJF0/iHsn7pgfTmtkVW2K1uuZmBHANa6NHNSXvM6bwsCsgUjnHWsLO52KOhlfElh9sX6itaWjPJxy98qbQfD8v+4f5VM22wh/BZ8qfE9B/wnkn+8f5193lF1gT4jHJ/XDpfBsBaNOPpXHinJyuehhl7p3+kW5EeMYFeNWbvqepBHUaDBgKNoPNcc2dlLY2G0sSDIH41ldm0oc2pZsLHy02FRj6VL3NoLlQS2xE2SPxos7Gbs5XJII1HJxzQlLqdMG3oRaha7hyO3StuZNWIqr3SO0iG3bj2rF3UjOk7MsC3JiOB0HBrWLujWpqivYxf6QcevNKzsc1O/tBbi3xcMxHSh7GlZWYySEswAH1pR3CjuF9DiEDHb0ptkV/iIYocp0qQhflI/s+HzjtzQZPcWa3+XcV59aqJo20VZbbPLL+NNPUzV2yq0J3HK4Hat47DqRsis0Z3kgYwemKdtSKbfNYbdxHyxheMVd7JmldNRM94TnHSqjNxWhyQ+I9x/YX0fGpeItcdPvNbwK303Mf5iv6C8EcJL6vi8S+sox+5X/U/ib6VOYc+a4HBp/DCUv8AwJ2/Q+sPCiD7VGT/AHhX79SV5H8X49/u2fRGnrCvg9JIZA3+iqGIA4PpUa/WOXzPGUYLLHKD6anjfj1N0shB53HtXfU1joGVu0UePeNbZiXyO57V4eKifouWTVkeN/EKzaSKUFcj1xXy+Nje5+kZNU5ZRPm74s6U6ysxQEc44r47MaVkz9q4fxCaSR5j4Kkaz8U3uhSnCXcPmxAnjevX9D+lfknFuGk4xrr7Ls/mfvXB2M990X9pfidHHAVkwR3718epNn6NSRz/AMUYyNMAzjivayxpz1MMbZ0Tg9LiTeBXsVNDwIStdHoPhO3Cwqy/pXiYiq+Zo9bBq7NgRorFj0rz6kpnfPkTsQXS+Y544HfFEbJGVOfLOyFjtwbcEp+YojP3rHXNGbqNp5kb4H4CuiM7VEctWCcrs7DT4sRg+1c0nqehL+Ix93G3mhQ2OayjqcT0kaFtGTbAHj3rKpds63ZwRZs4sHkZHasZGcYpMsRwkyEkc1F9C+XUkWE7jxxU7s05SSOMbh9eK1Xuo6OX3B1wpzyPShNHO1rYuW8f+jZ29RWFR6nRyrlJrRP3XI/OsZbkRLFgnz5I71onyxO1bBesVcqRx9KSd9TnavMs2SkR/MOe1TJtnQmox0Gyj95j9KI/EaPYraqA1pJ/unit3eyMHq2fMnxnDf8ACVY/26+0yXTDHw2cx/2g7z4Wqf7Jjbj7ory8xv7Zo9LL43pqx6LYRsY1LH3rxpLlZ9FSjFJGjNKbW2zg8jrisdJTsiqy9x6lC2kuruXgFeeDW8rUk4y3OSknubmj20kMeZDk+prkqe/LQ64SvubNqh8pge561m5cpvzJIIQFlYGocm0cz+JksKGSfkcZ6g0krm1BLmuWLyMLDtHBI9K1iXWujNtE3St259auUbq5MWm7kOpWrGcMc8GiKSRFSXLInt4QsY54Heoc+hvSXMrkV+mx844BzTT6mNWPJK4+JzJFjHGOKUtjejJNWJohi3Yk965pfEKstGX/AAaM3fvu61vF+4cdBfvTsiv7wcfWuWpqeoy1Gg2YpRNoKxR1SFuT+RFORlWWqKCjdCcjp3rPqQiqEBJGK0baRvsjLu1YSsMd6Iyu9TlcLu5JZRgN8xxmrfY0jO2haljO/IHGBUOOg+XmdywV+QHHWpjozeCSKV2PLwGPb86blqYVY3lcZaksMBeKTlY1pqyJGBDgEY5pRG1qQa2v7jkdAOa3g9Tlrq7sVtOUhMkcVckhNWpgxxcYOOvSs1EiKvI0PLBgyR2rGejO9L3TPlUrJ9elEFpqYNJMV1DJz7Up6Ie5EEIO3AqUluaxS5RMH7pXn6UX1M425xw5jY4PA70+bU1qrQq2WXuGGO/et3JKKOSkrSudT4bTEorJvU6k1bQxPiSh+0qSckHmtqVtTx8YnzkMMYfw9J/1z/pWbumaUo3os+WPijAV8fSZP8Z/nX3eVzX1KyPkcxgvrdzpvBkeETA9K4sRpJs6MPax6FpEY2g4rxqrdz04JHTaLEVAJXvya5JnXTR0NrEWQj2rM7IomWMRDgjp3oB3ZHNC0h3oOnWq5+UpU1a7CGAxMN3pWTcm7F8ySHTRrJ8v48VaVlcStKOpVaF4ZOcYOMcUrqWphJWehaWL9ycnPHNOL1sauzgU7EA3ZX/arpS0MqaXMTXaATEleMc1jJq5piFsRxxiSTb78cUk7EUVqM1CPAI29BUJ3JrayI4IjtB21fQcFaBG0YLsMfnUmSSbFmjIXJXtWiRrUjaJXZPk3EChL3jGCTkU5IcA5HTpgVurIuqroolD5hB9RxV3Oek1zjrpD5IyOMVLkjorfCZ5j559e9KL0OGLPpL9irRxa+BLrUNuDdalIc47KFFf1b4OYV0uEfaW+Ocn92n6H+eH0ksd9a8QalP/AJ9whH8L/qfR3hS3H2yIE/xCv1yiveR/L+Pk/Zs+jtNSyl8KwosCBltMHYuN3Hf3rmqKUcS2n1M6UaE8rVoq6i726+p4r45T/SZdw7ng16cneJ5uXNcqPJvGNsWD89+K8fEpXPvMtnax5L44ssrKNvUHtXzeMhe5+hZVV2Pnz4saTujdivrmvk8wp3R+wcO4i0kjwPxA8mi+ILfWYRg20wJ916EflX59nWFWJoTpPqj9pyXGPDVYVI9Hc68sjv5iNlWGVPqDyK/JIrlbi+h+40aiqQU47NHNfEx92nDPp6V6uXztU0OXHu2HZw2lDLjPrXuVHeLPCpp2PR/CMObda+frRam2z3cDG7NeWDa3C/WuSb1OqtG0xi2oI3HnNRuOlBXuOFudhT2pKXLI62tDPu4AVJxWvP76ZyYle47HSWOfJB9qmXxHZLSpIffg7gR3706Nupyte9c0NPQtbAOayruz0NU1Yt24/vCuRvuOJYgX5v61D2LsSouH96RoSeX/ABgdetDk27Gy1iNkRmb5RwfWmmzO1nc0LdCbYAelZT0epXNdEttGQm2odxxRZsY9pJbrVWlY3c1siG5fdckdxTUHYasW7YnAJGOKcvdQS0QyVf3vGfeoT1NW7orako+ySY/u810LZEdz5o+NSL/wlO7/AG/619nk3+7nw+c/xzuvhYpbSUwP4R1ry8xX71no5Z8CPSNMXKJlegrxqklFHvqVkjRubZpbfB9OBXGn7xTXOhuk2ojdV2cbueOtbtXWocisbSQBQdq8duKyk0loNKxes1/dEGuaVylZsBDumJZc8dKpK61LlT0uSQxhZwh4FO6SsFJqMh+ozI0e0cYoTkzSu7rQoWGTcEMO/StJcyRFCKTLGrQAgOorFSbdiMQve0IoR+6yOlLVM0oP3Srqe7yySOR7U1J3HXjfYgsJpSgVyOnFVOTWhFKUYF+Mny2TvWfK73NKvvRujR8Ggi7wf79dCj+7OOlpVO18vLHmuSex63YsxFVUZHPp61EdzoS0Kt+m6Mj07CiSZnUimZYTaSDUnMtyFkw5AXjvVTeh0vSJmXKgSnNZxu2c8gto2lYqPrXQ5KMdRwhY0VgULlhyVrBtyZrJpbEkcYKYI+uab91ChJlPVLRVUMByD2pRZUtRlnEAMKOKfLcy5tQuFIYqD9TVQvsaxdyvqoJgyR/COK2ppp3OXEO0irZ5WLAPWrk7F6cgwlWnAb161HOrGMW+fQ1UB+zDjnHWueTu9D0UvcM6dSZCaqOiOa92IAxXB/E1nJXHKIIoGC3pxSadjSm9BrRNnn9KhExi+ck8jEBHtxxU3szZlOwjCXBPbNdkVzQRyzVlodN4c5lABqXZF0dTG+Jhxc5963oL3tDz8w0kkV7Xnw9Jj+4f5VlWlqVTf7lnyz8VXH/CfP8A7x/nX2WVP/Yz47MZXxdjpvBAzEmPascS9Tpw2yPRNGHy4AGcDFeLV3PThudXpMfy9M5xXJI76aN6zUqpwPpmpZ1pIeIN7YYdfWplK2iLaWyHiMKMe9ZxjKpLlirvsJu2h33w+/ZX+PXxSu7GHwX8MtTnXUifsV3LbmOF1BGX3tgbRkZIr6nB8G8TY2CnTw7UW1q9EEcLiq13GDsjQ+OH7J/xW+CuoJJ4g8HXMGn3l79l0qWSQPJduMLlUHzYZgdvHQivQzjgjOspofWJRvTbtdO+p2vLcVQpc0tbbtHmWuaReaPdy6dqdnJBc20zRTwTJteN1OGVgehBBBFfFTjKE3FqzR58rLchXHkfUUr2dxLYoaejfb2z/errg7xM4O1SxZv4yXbnqa5m9TWuhdPgJP8A9am07E0UkR6gm+Ug+tQiJWlIYkexPmX9K2lsXJWiQRp5kp3fhWaMI/EFyu0cjBArS9jevpAqzKdhAP19qIvU5ofEV3U7CdvWqk3c1nsZ6xu8x9DVp+6YRjyyuSXMY8rGO3OalO5pValAzZIwuSK0iklocG6PrT9ljSv7P+FelKVwZkeVsjrucn/Cv7R8OsK8FwdhKbW8eb73c/y98Zsw/tDxAzGqv+fjj/4Dp+h7l4Qh36hEB/fGOK+6pr3kfhuYStSZ9Cp5ceiIRbbQ1sM7DxkDqa4226u/UnnjHLVJRtePQ8Z8dwv9skJIzk8gV6k17p5uWS9xHlvi22B38da8nERPuMvnseWeNLQssnHUda8HFQbR93ldTVHhnxN0wyRyBh69q+Wx0bH6tkVflkrHzv4/0kJcSrs4Oe1fD4+DU7n7JlddypxH+C746l4fiDH95bEwyZ9un6Yr8jzuh9UzKfLs9UfuXDeL+sZaoveOny6Gd8SY/wDiVjvWeXSvV1PWxqXsTiNGX96oPrX0M9Inh09T03wen7hcj6V4OIvzM97AGvcqRwRzXC22zpr/ABBBEWizisnLlbFSaQjJtUgjvxxU36s7I2ZQnjyCSOh7CrlK5y4hWize09MRDPpV1L8x01NJsddOCo45HtVUk0rmKSaNHTCTa9O1Z10hW0LtooLYNcctjSO5bijy/K/hU3drHQ0h7AbxzUttCvYlkX5QSMZFQneRrH4RChMYyK1joiaj7F225g+7xWM3eQQi5Ilt9oHHNEYmzVtCSOXbnd09RWqaQpe5qQKVec89+tNy0Kppz1ZegXGB3A5Nc85XRrOPujZgWf8ArSi9RxINQj3Wkh/2a6L7Catc+ZvjaCviccfx/wBa+0yZ/wCznwuc3+sHc/CbLaVH9BXmZimqrPSyxfu0enWKhLZVK4OK8GpK7se02tjTs1EkRDL9Kwsr3NYSaRNp9uBNnHGetOVV2sPmbZpupYkYx0rBu5qotk0W6HK4x0oauGzJUQEh8U3JctkbT+G5HGHkmPrntUx+GzMqceeZLcW2SFkOPrWkUVUg1Ipoqx3O0DBz1rbRolS1si5cgSRAMB061yy+IucFYgiQKDkAccZon8JcVaNyG+VZNyMBzis4pha8ioIBCgyuOPzroUUtTKpFJ6E9kQ5K/wA6cnZFwi5o1PCw23xwMfPVRleNjnUbVjtkyG6fWuSpueolZIsquFAH51mtzdP3SCQBoyMdqp6ol6oy7hDFKeOvesznatIheMbScHpSk7s2voZVxHvmIFVB2MZJouaZpdxcTJBbQs7t91VBJP4Unebt1FdQV2eofAn9lr4p/H7xRpXh3wXobeXqdw0S6hP8sEQRlDszdMLuGfrX0uRcJZvnic6UbQW8mNU61WnKpH4URfG39m34mfs/eNr3wV498PTwy2czLHciE+VcIGIEiN0KnHBrfOuEM3yafvw5oPaS1Xf5M7pYOpCnGotYvqjz3VLJjHkj6ZFfJNOErM55qxStYmQYI5703K6MYx1GXaEOc/rV09maJWdynq7ZhCjriuqiuY566TZTswfLxU1JWY4Jcuo0Rf6SGYkc9KzXvIm/v6GurHyNnTjvWcklqdkW3EpMPn5HHrWSZztNO4MpUEnv2qm1Y3klyjeRgd/Wjczp3HFCVzxU6JnQl7w9RiNl9PWspK8hVNEUoIyZ2PfdXZDSJzXvG50nhtMSgiom9UaUlZGJ8To/34B5wa1otanlY+7mV7YAeG5c/wDPOsJ35iqd/YM+V/imP+K/fjPzH+dfdZSn9SPjsbF/Wm2dV4HBEKfhXNiXqzuwy0R6Joa5+Ujj1rx6q1PUprU6/SV+QZHUDmuKZ3Q0Ogso8g4HpmsZao6FbqPKqJdh4J6ipUerLv2Po39h34NeDdXk1T40/EHSrfUNO8PTwrDYXhxDI7N8zN6hVDMB3IA96/oDwd4UwmJpTzPERTeqjdXtZbn0GTYGFRurUTd9Fbv3Pp4/8FFba61P7JodraWul6ezLbRWduixxuoISJemMkZav22eW4BR5bt38+p9JTyjDUab523J73Z5F8Xf2+tO+I/xF8M6dqniOzbxhbTSz2WuXUBuTYXMvyiQRsdpkUH5SQdpIPWvNznDYCjlUsLRtFtaeVjz8fDA/Vng6N1B291abdDwX4//AA9+FvgA6pptz4p1LUPE3niTETpLHDubLPdSgsDNLywjU/KCM85FfydxHlmHweKqONRylffp5r1Pj3GHs23Fxs2rO3R2T0b0e6623Seh4+02ID0r5eMJPcwjK6KOlsXvmz/ertjHlgQo/vLl/VF27sDvXI2uYusx+mL+63kfjTcrBR1TK1wrPOc1KZk/iHyoViOPSqlK5rN+6VoY8NkjvQc8dHcbdKSpP05oT1LqvmKkiEr05qo7mcNyGdcREdPWnJalTM5AQ/A59cV0QWhLRJdEmLBHIFCirky0izKulba2D1FVFc0uXucFaapUXN9E39x9s/BrRjpPgrStOC/6qxiBHvtBNf3hktJYbKqFJL4YRX3JH+RfF+N+u55icQ/tzm/vkz1fwXATfxEDHzivZpu8j85zKX7po99haJtJjjIH+qAbI9jXHKMva3Xc9DDwpSy6MZLXlseP/EG1EV/IFPGTjjFetfmijwsC1FuK6M8v8UW+Sx6H0rzcRE+wwM7WPM/GNqy7yRjPt1rwsTE+3y2pqjxn4iad5gclfXpXzWMgnc/S8mrWaPnz4maT5czsydSe1fG5jR1P2LIsRzwszi/AU4svEt5o8jYW6i8yIHpvXr+hP5V+X8V4W9ONZfZdn8z9m4NxiVZ0n9pfiiT4jMDpmD2r5vAfxlY+6xz/AHBw2igfaBn1r6Ccm1Y8Wgrtnp/g07rdPTvXiYu6bPeweht3EIk4HT1ry1LU6J3lIWBCsRXHSqlFbjceWJEELhgtZt2NqL0KV7GV4FOLuzDEu6ZuWuFjBLdq2qS947K3xMYzBnAxyema0pv3dTmjF81zX0yM/ZjgVzVZ3ZvKOly1ZKd+AO/XFc71Qobl6IEEnHPrUtWN2mP8os2SetZy0ElckKFkAOMipWkjZaIWRCEAH4U9WzNq5ZiUiEAk+9VFO51RhamPUhE47Hir23Mk0ndlaS5aRmRW4Jwah33sTf2tQs2sAiGW56Go1kb35VYuwHcc4qJq2hb+Ajk5fG7OP0pQ+IcdGR3qj7NISOdhrp6IGtWfM3xzXHinP+3X2WS/7ufD5yv353XwhTOlJj0rzcyb9qz0cr1gkenW6nyE6fd614E/iPbkrI1dPGLfGPxrFgloWNLXMzFl70nFjp6yNCJS0+GxT5bHfBKxJeDYMjtii1zmraSJLVzJD8opTjrY6YWnALMH7V5bLxmptaOoQiozF1qZoRwuOetKDuTiE+hRso5bqdtxrSpOUFZGVOK5rsv3ERVQo7Vild3ZpJ3ZEUbGQKc9jWXwaFSVGaTB6Y61MWkjKEmQ3kEoXAOPrTU9SmuYn0WAKx388dxQ7thGXLoanh9f+JmQBxuFWtEcsZXr2O1GMgdOnNc82z1X8KLKjMZBH4Vk7otPQgVcjkdKE20KL1M/U4irkYqrEVFaRWXmIgmoadynojLlhZbv0BNaxi0jKTvsdn8NNO8Zafr1l4p8Lz3NlJYXkbxarDGcW0oOVJboDnsetehltCt7ZVafR7i9j7f3JLQ/S/4WeK/+Ed/Y9/s7w1o9nYeLtSa51C6udPgCLeoWHnsoHEZY4YqoA7gV/VHC6XJTqOK5OXVW+13Pq8swtOji4VJNOmkly+fR+ZyHw9+P+meMX0zSfilJbarY6fvt9Vt9QtUlEwIZUiYsN2xSQcgggnuK+srYOhicPVhyr3tl+Z3YuFOaqQjHl5trfn2u9jwv9rX9mr9nvRfDdz4p+GfxjsbjxEIVu7vw3a2Rjt41b76ROTyVPQelfjfGvh3TxWHr4/BYd0eTW117yXW3Q8itgauIpSqex9morrJO/n/X3HycsKhskc5r+dkpbHgxaILyMM/I61vBWiVN2VzMv4mlj5bgVtSk07M5qkk0VIsQLhzjPetZxTM0pNCS6pplpIDJIM+hNP2b6ImFenCfLI1La6iv7TzISMY6iuWqpKVj0IzhylQqfN254rKxLs4j3XII+maGrBe6K5RgwHbtVpaChoTKCUwPx9qylpI0hJuY+NMRsO+Kyk9TSrblKdqhM7D34rrhfl1OWC0Ol8NgeeAQOOtZyepvFaGN8TYwbkfUVtRWp5WOj7xUiTHhyUH+5/Spl8RVOP7lnyt8UAf+E9fP94/zr7nKn/sR8hmH+8nV+BlxEmB3FcmJvqdOGPRtDTaBxxxzXi1XuerCyOr0lfl247DGa45O7OqGp0dgcIQBk46VLVjqhFtEogXzNxPGe/asas3yNI2jZH1L4LupPBf7I8mhy2fkz3d1DcW534aczFl6d8BQB/vGv6/8OcHiMFwlQjBPmkvz2Ps8LWVDBUXDzbPn3x74/wBL+C+iXOr6r5iW+iWsq29tGQfteqSkEKR325JNd+Oxry3mjNOLV9LdW9dO99/M83NM6q0sPKpzXb0R81fBfxX4s8XfGaDXvFF9MXa6af8Adv8AOF68A9+mBXzjxteVOdao/Q+OwuPxFXE+1kz6C+J+u+ItcaFbqOGyslzJDo9qDtjPeWQnmSVupZifQYAxX4HxDja+NxbTVld6L8zR1J1puUnds5NpN8ZxxXza3OhRUUQaOcX/AOI610WvC5zpv2hqaoNzEY4rz3uy56k+mxAWxGKbWhUFywuVHTMuSO9OKbVibXkPnB2ciiUbFNakDRkHp1q4pJEzSRFMu4HP4VDfvEW90qSDjOK0huRH4iG5UiMkDim9y6mxnpGd/I963j8JDauLcJmIg/kaUXqKTWxBpmmtqOr2WnKCTcXUaY+rCvVyPCvHZ5h8P/NOK/FHynGOOWWcLYzFP7FKb/8AJWfdvgyyWGBLdBwihQPoMV/dtKKhHl7H+QmaVXKbk+p6H4Ih8vU4mwMqwPSuyilzHyOPqWhc9se7W8s9wCq7KPurwK5eRxlZHq1MbDEUOZKzaPK/iDGXuHkY7juOTnJr01pBHz2AquU3fe55f4mt9xdc1wV1c+xwU7JHm/iy23K4Zs49a8TEK59ngJ2aPJ/HNgGVzt49u1eBioJo/Qsqq6o8J+J2kGRGJTpntXyOZR0aP1fIcVyHiesvPoWuwavGCDbzBjjuO4/LNfBZlhfrOHnSfVH6vkWMdCvCpF7NGh8SJYpNP82I5RxuQg9Qea/OcDCUa3K+mh+xY2onQUls9ThNJl2zj3NfSKmlHU87CwlO7PU/AvNogPpXz2NSUme/hlqdD5ZfOa8mWjOlx94Ux7IyO1aSehVaNooht1GCzVjJhRWhUvkGCSKE7GdePus17eAPAB0yOtazl+8Oup8bQ1LfEmAOe3FbJc0TKrGy0NvTeISD0x61z1YpO5MG2tSa0B8wjPesm0kXH4i6GC8r+IrNts3lK5JDyCSO/asp7hElQMZMY6VLRVwlGcL3z+VXAuMbO5ZC4gwBz3rbZHVJ+4Ub+/EK+TEcnPQVmtXqcE25OyJdHtJJCJpR17VMmtkdEFyRv1L04Mb4UHoM1UdgTuyzaYVcEZxWFTc6ErxImB8056npRAq3UbdAtbuP9jrXT0Qktz5o+O6lPEoOP46+yybTDs+IzuyrHc/B4Z0pDj+GvLzL+Kzuyr4UenW4P2dM/wB3pXhVNGe9NaI1NPObchelYp6hTSZZsSIpTvPBParlJ8ug/djLQsRzfvOuKhzk1qbUql3qOu58x5JJ4qU22KvFt3JtEvk2FZCD2FObaNcPKMYliBc3JZSDUSnJQsVdc1yHVna5baPXnNRG6WpDbnIXTYjC/Hr1rW11cLLnsT3bkjPf3rOUrbETspEYGU4HFQ22dENaZVIKyEkd+lVbQwXxDbwExggZGMgmpimmbok0kFCcjAHetm7IxluX/DYDav8A8CoSbRy02vrB2zJtO4+1YPc9m6ZZQAx9D7VjO4m7EaLhyw/lSg7McGUdVj65yeBWjY6q2ZnxgBCT3pN6kSehRkyLjcRnBq27IzvbY9N/Z8Hiy98faZpvh3xFLpceoyG2kuFAaKcnkRSo3yupxjaQa+k4ZwdbG45U4ysmdlNScLn3H8efiN4R/Z/1r4Y+CPF9mdPa70VpNWh0qQxrDJOSUcLyEQcEg8Y4r+n8lw8aOW6vrZdNjbB4qv7Jyi7q9lf8THl+Hfhy7g1PxTbSW1sIJd0lvG+5W3jPnI2MFCOvpn2492GLXMlbpuevCtUnUjTim2z5E+Lqj4f/ABG1g3mmvPFHZrBYNeXZwzyE/wCrXILADnkVHFeJWD4YxWLm3pBpK+l2dGNxapUpSm9WrHl8gzkHqeeO1fw8m3K7PkqcPduyKZdynJ6dK1NG7qxmXWwSeTx14zVJpHHzRjJpmNr8r2doZD8oKn5iK0Sc1oTVdqbaPLNa1/ULzVjbxzEjeMMK9aMKcaOq1Pnp+0lV5j1bwF5zaGokJ+51PWvIrcqdz28LKdSOpoMmHI965b3kdyXKrCycLnFE2b8vuEDEnAHrUxZjFEkXA5HXtSmjaMfeJ1X5GGOo61hZ3NKiumUYlIuGGOM9a7IbHHF2VjpPC+1p1DHGKie5001oZfxMjUXinI5Irek00edjV76M8bf7Bl7fuz/KsqmkgT/cux8r/FMD/hO2IP8AEa+1yl/7HY+Lx7vijq/Aw/dJ+Fc+KTTudmGWiPR9FUgDjkDnNeLW0PUjsdTpfyjYOvBzXPy6anVSTZ02lwkrlv4h1FYVJdjthex0/gX4W+Pvibrn9i/D/wAKXWrXSjc8NrDu2r6segFellGRZlneIVPCQ5tdexpGjWrS5aaufQfx08Pa5pXiHwr8M9XgntLiHR4DNE0e0QMics3rtG4/Wv7Ty2ksuyGjSmtYxX3o+xkv7PyyLl21Ph39rrxhazeKpLrStNEul2TtHZG6YskDZJe5kXgySsegGcDHpXw2ZYl47EuXT+tT89zDGRrVnbWJ5p+zhrKz/GKzuwHu08wAXEybMfQdh7VhWpQlg5uOyRx4K9WraGx9KeLtNutbjuNW0TSJWt43P2jULhAAzf3FLHn6CvwfNMHWqVJShHrv3PUhFRlynIhMR7n7CvmU7MU5NOxFpKj7cfrzXUpXpkU1zO5rXiF5QpHOeK4HbUp3uXYoxDaZx161V77nS42pGey7mPcZP4UQ0MI6yJJIwIwO9EmazVkQvkDJFZ3Zg1cgeM4we3tS2HbQp3AO4j6VtBmS+Iiuh+54H1FH2hz3KESFnwPrnFbr4SZJXC4TGVIxx6U49zKW5s/CTSv7V+Keg2ZXIF8JHHsuW/pX2/hzhfrfGuEX8rcvuVz8j8csweXeGePnfWUVBf8Ab0kvyPtnwjAWiVuhIr+zYs/yuzGa5meg+Bo1j1WIsuQGGRiuujHmufKY53geuXCSJZvHDgblAHesI6z1PQxLqRwzjS0ujzDxlAwmcSLgjOT616NvdPFwL5XY828SQAFgPfmuCsj7DBzvY878UQHD8Y968bEq6PscDLY8t8YWjkuMYPpXh4hWR91l1RKx454/0sSK4I9eK+Xx1LmTP0jKKzVmeD/EPRyHkUD17V8fWo++freR1lJq5zGoag1z4OjgmfMlu5ibPoOn6fyr4TMMJHD5tLl2lqfsuExLxWVRu9Y6HPaRGRcDnqe1bVLKNrnfhJ80LI9V8CqRapn0r5jHP3me1hkdKhAOT09a8pq7O1R/eDpAfLOfy9aJdhYjZFVMhSFH4VnLciiVb0EIcn60InEP3WbdqAsYU+mK3cFKdy3U5p3RLHAxYELV3UFY1nqjStF2jHr1rmqTuZR1ZPFGF5HXHFZNrlsaJWehYjDN+FZy7G1OPMyeMbEGPyqZWKasOTIP40uli6ceZj5SVIb2raCsKpbmSQXuoLb22Oh7j1pOPMx1alocqM+wt3u5vOkOR2zSlK2gUoWV2bVo4j4xgA9qXLyop6q4skoaXaxFSmTBcxYgzjjgYHNRJq522UYg6rvJHepi/eE2RyD9y4I/h61u37qBM+bPj9GB4iU9Pnr6/JHeiz4XPH++O0+DvOloPYVwZl/FZ6GVaxR6fFkWycfw14NXc+gnblL+mSMIuawSuwpLqWWLI+V9jmtlBNGctZFm3DSgSA9etROFtjppxSRdFtGItpANZxjZ3KrR90ovCIpv3fyjPaupOPLYwiktjT01mCEsefWuWra2h0Qg3uKYFaQu4HXioSuU1yahbg+aexrV/CKGsri3CFyVH51ildiqRfMC5VAGAocFua09NCrMpWXBGPemmrGM42kNeMlBkUXNou8SazQRk56U07mL95k/hjzTq2FXjdXQ2oQuYUqf7+53iQgYJ9OledOTbPWukShty49uMChJtEvXYckWTkg89azejEtGVNUiwmfQU+YqbujIlUqSuKSbZlK9yrMqtIOCfm7V0JLl1KUF0Pev2JI01P46eGtIg0eG5zq0QVYoiVbJAIlUjA4JIYdD9a+w4S9r/asXCLtbex6EJ044WfO9kz1r/gqv47hu/wBqbXrLUPG19pWm6Vbx2V7BprMjXFqkYHkZUHOSAMYxkgngcf0tTpxw+R0HKN/teafcyjajk9Fw66v7zf8A2NviZ4M8beEhpfhS9v5NItQtvHFq7hrqzzwUk6F1zznFdUcU6qi1vYv61L2V1ueBf8FG/CGk2nxS0LXbeNg0Vy9sAE4ZvLznOOnUj61z8Vxni+CMTTau7G9bmng1KerPEWQgbvzr+NZLllY86LtoI8ZaNsA89KcpWQ7KRzmqJcfbS+Mbf8aqFuU4atNxndEeuJHqGkeRs3HaRtxW1FSvcio+enY4fT/h3cxah9rnQ7C2QD2retW5vdTOCGHs/ePQtEhSzsxbxjAC4zXDODR6uHjyxsKfv59aw2NZS1FxuGOMUpNtHUrONiMrhixBJognY53pIIs7ifWqexvB6lmOM+UxwelYydmby+EoRhvtDL2Jrog/dPOUbO50Hh3KzCs5XbOqDRlfEckzISeR610UUtTzcbfmRRU58Pyf9cz/ACrOprKwJf7Oz5b+KKH/AITth/tH+dfa5SrYM+KxqX1k6vwQoWJM9wK58Um2z0MKro9J0FSzDA7V4lXWR6cUdRpCIzDAzjjmsamkTrpJ20OstLeaG0F6I22dFbHDH0rnjTlPRI6veWx9r/sxXsnwW+ANqY7WTTtT1+Vrm5uIn2XN2qYIiB/hjxwfUnjnp/Xnhpw3RyvJIOrFKb95t29ba+X/AANT7nKKGEwODVWtG8n36X2OK/bB+J+v+HdBk0/xKou/Eup2hkcPiY2VsVJSBTjKyEcnngYr188x9LlcaTsvI+U4gzZ1ZOMHaC6dz8rvjLea8PHMt7r1jdz28zlzb3HiBZIxz3jiIKduOor8+k68qq8+zPzrETlVqpR0Rvfs1aZ/xc2xktlQKZQ2xmOMenJr3qlN08BP0PsMuhGjS1Ppn4oaNqutzPrHiWfUZFgCixVohb20K9gqnBbPqBzX8+Z5Kc6sueTtcULSldM4l5n2bX49q+V9xsmV7sdo+ftpfb3rsX8MdF+9Y2ZBvnxjjPGK5GtToULT1Ls/ywBMc4qG+iN6vwFCNDn8aIvU54qzJZh8uPUVbLm7orSYxU8tzJK5E5BzxxQ0S3Z2Kdwu1yCOe1VBaCjG7uQXH+px0oXxEztzFWFArgsPwrqfwkyGTpufHepTsjJrU7r9mfSftvxTS7ZeLSykfnsWG0fzr9c8FsKq/FVSq/sU397aR/Nf0ocw+rcC0sMnrVrL7opv/I+vfCy7YVHpX9VQP83se7yZ6D4BUS6sq+Xu6cYrrofC2fL46LlFJdWeq3Muy1bjnCjAFYxXvHp4ut7LDtLfRHmnjPfJPIzsSQT1rutZHiYN63fU848RR5LE8e1cVY+twb0RwHia2yXB9OleTXjc+twU9rHmfi+zdt4x9Aa8fERS1Z9xl1RKx5N47tFhDGXqc7R6183jU5n6DlNVzaseH/EbSZMtMY8H+7618pi6ahJs/VsgrxUrXPJfESXNjHcRCP5JQG+jCvhc7pqpUjUXTQ/X+Hq8anPRb3V0Z/hmJpZwZSQc8V4+Ik+TQ+tw79jues+D41S2UL0r5/ENvc9vDS5nc6CNcHkDmuE9BbizkCPpgkcVk3dmVd3K6AKpyO9TLcVKNkU7s7lYEfhTtYivbkZs6eDOVc/dIziuiclDQunBQjqaaKgXpjjisYqUncpvm0LECHBIFYyVmSlZlmNdqg+o4rNs0VieCMhsnj6UX0ub0HYlK4OO1ZNhLWQICOetbU1c3hZRC5mEEQkbqOme9Xd3sjnrvl1M4GXUbkAk4Bxirm3TVmtTOhB1JczNe3gW2t9qisEru51TktkT26EoTmpqTLdlGwyBD553NnB7ik9gilBGjDDlTj09KxloaqV0MdMMQSdvrThrIFdu414w0LfQ1tfQo+bP2gSP+EkCgdH/AK19hkelA+Czu/tzsfg4caYn+7XBmb/es9LKvhR6dCGaFVJHSvCqWTPoJr3UXLBtgAP4CoWrNKVlEsXMxjjVs846VpGTeyMqu5b065Vk3EYU9qmcjopfDqWLq/8ALiIB5HSsbXeg6t2jPtLma6u+e5wQa1qNRRz0YtTdzoLRFhiDE9O1c6vLc74tNhNMASM8ZqnKysRVd9CKG5XeTt7+tKUu5NKyepL5yg7mwT2zWXM+hVSVw3oxxgVMpMKbsVr1G3ZUU4MqUL6iwLuiG4cnrTabZnflHRIxkK84PpWiaigiang+zkm1oQxRlmJ4AGSaicnJGUHy1T1fwt8JvHHjW/h0zw74curmSWB5h5UJIEajLOT6DHWtqOBxFf4Y+ZvKvBK7Z2nhn9jv4uaj4Dvfilr+gT6XoNjp5vLi+vIiuIixWFVH8TysMIo5x83Su+lk+IdGVSaskrhHGUFVVJO8n0POLvR9Q0+CC4vdPmt47qMyWzzIV81ASNwz1GQRmvFq0pws2tGbpxlJpPYzNQiyv8/asS2tDEvYSmSBj3rSmr6mUlqVobf7Rdxw93kA+vNaTvojaFrn2F/wTLsLiz/ae8P6FI02nym8Aeyugsq3CLhmA4+TGAQTjPY9a/R+CYSWKknf4e2jLxShPDVYvSyOT/bs1OLxF+1t428QGaKZU1hoVtLpQYud6qWBHK5xn1xX9I4mj/wjUodomuLlGjgqUI62ijK/YW+Kn9na1e+EfEFtp935eom2v9XZjFuKjKR2zHPmoBtwh27e2a8bAxUVfqeAsZXr4lRi32d+x6Z/wUM8JS+NvhLa/FfSrIxxWRjunwg/5YNtk/ONia9eg44ihUwtXVSTv8z6fC02sPKm3qtT5CvYEU5gbcpGQexHUV/H+fZdPLMzq0JL4W7ehwVIcruVmH7sj9a8ezkJNNmZrEMcaiTZzxzjrWtOLTuYYiSiZVvC08md3GeB6V1cySsctO83dFi6tgoGT09a572d2XUiyWygLJmsp1GbUWnEZLGUfAB96werLcR6oSv1oaaRvTkmQy5VuRinF9DOa94IEO4ArVPbQ1ptFyFf3TAkcisJbnRJe6ZpXFyTjjNdFNaWPO57uxu+HTvmHanONkbU9DI+JLbJhn8a3oQ0ODGSXMkUrUh9BkAYH5Dj8qyqRfPYcbyw70Pl74syGD4gsgXPzdq+6yyCWDWp8ViaUpYrU63wDC0kSM/tjNeZjaiTsj28PCMIHpWgrt6DkjgV5L21OqKcpaHUaREVlB7nsa4q1SPModzsh7qse6fszeCPFnxJ+JXh7whpunQSafdXLk3N3biSO1kjUOzc8AlAeDxg19xwLktXNsyhBr3E02ell9H63X5LXS3PoDVvG1lfeK/E3xMu1tpNF8GQJa+H7SPG2S5GVRD+ILkfjX9Z42Ussy+FCNnGST6Nq11buuunVWfY9zO8YsPT9lHoj4B/aj+K3jD4g61eappM2pSrBI5vZ4xta8c53hZGZQq9s8nA4r85x2LjXnJvZH5ZmGNnJqV1ZPW/U+Sbmyjk8RyXMuhNaSSvkp9sMxOT1LZNfP4CKq4nmSObB0fbVue1j2j9mPRzdfE3TrFohtJzKGXcGH07/Svr8wrxo5ZU923LHfvv+P8AwD6ulKUI+R9ReK5fCr2byXPh7U4HLHN7JcRNI5HQBHXKr9K/mjH4qjWqS5oNa73N+RuSaaPLPEcFxHme2zjPfrivFpwUnuYVVJ7Ffw7qW+5IkIXnvXTL3Y2RNGShK7OlhIeTzAQRmuV3PQjKMmXbxv3YGew6Vzyb5hyK0QBxxznrVwiyHHS4XHCbc9q0k7IiT0KzLlcZAx61ClYUdHchcY6n6UORFTVkEybhnb0qoy0CDKt4hC4zz2NVF6mVValeGMj/AD0reUlykxegyRfmOevfFZpuzId7nrn7I+lGXVtY1YpwohhU/iWP8hX9CeBWEdsbin3hFfi3+h/Fn0scz/fZbgk9o1Jv5tRX5M+nvD0QCque1f0NA/hfGSu2ei/DqJ/7TVkYAjGDiu6lb2bPmsU25xt3PSJpsRMkg3cg5HQGsUlzHdiK6hSkpq7PO/F0cstxLMzgjJ4rrs3G7PHwctFc868RRtuYgVx1j63BPY4LxJHjcMg5ry62iPq8E9jzjxarh2igTc+OT2Hua8LE80nofZZe00nJ6HmPizSSGeVjvc/xV42IjpaJ9zl+IvZLRHkHxC0nekhK/WvmsfR91n6Tk2JcWjxTxrpx+zzIF5U5FfEY6hzwaP1vJcV7HEU6iOY8PyYuFXb/ABenSvnZwXsz9RqWnZo9a8IAm3X0IFfM4pu7R7OCtynQxpzzXnT0R6Em1qhHQlME9KwvqZ25iq2RnjAq2vdubRVkUbrkNzQ3octfWLPSfh38MNV8ba9Z+GtJmt4Gu5/Igur1ikLSn7se7GNx7CppxniZJodWpGjpLc9m0r/gnV8bvEmkWGqeFLVbs3dnc+dbFCsttfQZL2bj+F2UZQnhq9yllVSUdGcNHHL2tpKx5v8AED4OeMPhbrcuia9YNJGLaK5gvIYyY5oJR8jgkccgqQeQysp5FeZi8JVoS1Wh6vNCaumc+tsV+Ug++a4ZRaHFWY6NTEwyKye1i1oxzEn5QetOMbnQo2V2OQqOv41t8KJjK2rMzWL5pnFvCeSaIrqznnJ1Z2L2k2gt4Azr8xpfEzqiuWNkXWYsnJqZys7IiWjJbVgI/p1rB67myvbUbCrfaDxxmtI/CD6GlbYAOfSsZp3LjJJWIZRmTJ/ECiKszaKdh6RF0Yf7JrYLq582/tD2wi8Qq7f36+vyT+BY+DzqV8RZHV/BmRJNNUIvIXnNcWaRUajbPVyqlPkTPUbRCYgD2WvAqyi9j3ZxaSLFspJwc47Gs76EQdmWdRt1MYPbHWqg22ays0T6QuYwpqZp3Jg2noTX0SlNg/Os4t3Nt2MsLQRuWHQniqlHmWpE1yyujYi5ADd+2aTfKrIqErMV7QOCefzrHmbZ0WcmPh05B0P1JquVvczlBpj3soxycc1KTuUoXiRiBQ3y8U5RVjNXixtzACASKzjudF7K41FWNQCOT7Vra5hbmZLbwkvuxx3qKjSZfKkz2v8AYP8AhNf/ABJ+O+nwWtgbgBm8iLy9weXhUQ54OWYV62W4P6xVSseXip+zi5PY/fL9nb9jD4OfAf4f2Kav4esZ9Qh0EWN/eXMahfLPzOv0JJz6195ChCjBU4K7SsfD4nMq9Wo0nZX0Nrx14B+Anx38Ox/DK5u7P7DbEOlpaIqLwuwbeMBgp2gjle2Dgjo+r81LlnHQinjcRhavtE7yPy1/4KcfADTbPUrbWtM0yDT7ttbbR9D0iFX8xLOGMLb28EAGWZ2LMW6cepr4viGjGKv1vaK8j6/Jca5vls7NXb835nxt8W/hX4k+Emvnwr4y+zQ6msKyXVhFcrJJaEjISUKTsf1U8jvivkJxlTnyy3PpqVRVYc0djg79QY+aum/esNpWKIEZcEgcMM56VrUvZWFHm5j7O/4JoeO/EfhT45+G4tejmuLS7uY44EutNh+ReBlH5kUDOeymv0rgmrUWLcJyesXbsa4nDVMRRmm7aHk/7Z7tD+0Z8Q7m9gY51S5WMY5dfNYKw9SOeK/qWtBLK6Epx0cV8+n56G+Jw7hRpc38qPOP2fvilFofxBl07xLolvquosyLDqdzdGOLTYlwI/KiGFaTGcE85Jr42rWVHEqMHqeHLkp1NXZn6R/Drw58O/jb8AfEHgyFZ2hEDyWw1RcvnaQ6ZKjdkHqABV4epXo4uEqjvfRmkswrxxUXT+F/kfmR4o0OfwhqV14V1D/W6LfPYTEA8qp/dOfQMmB9RX5/4q8MRqwWZ4dbaS/Q9eUVOnoZsigKzetfgSjynPHS9zF8Qyu0Y2np6VVOT5jkxEHLUo6JDJy7Grmww9o6Fq7XBOfxrKUi6tiSzXEfGQKxmrk03YSaM5Pt0pKOtzXm0EjZc4A71UloXCLvchu1QvyOlYRTuObsxsQZmCp0HWttIajpx6l5ExET7dawbuzeU/dsjKkfbcsq+tdULqOpw8t5XN/w0u2QH86iUm2a82lkYPxSl3TKievIr0cOrQOGvTvK7M7Szs0sxtxlfWuStL3zeEkoWR4L8YfBGpx+Lv7cihzCTyfSvpsDjYyw3Ij5XHwqPEXWxseCU2RKG644rGrCN/eNaHNM9K8OWvmBWYDpXl16ii+VHr00ox0Ox0PSri9u44LaFnJYDCjJNcsIOpUUVuy23sj7n/Z48Pa/+y/+yj4v+Mnie0+z3HiEi18MWksYDHCYe4TuAQce9f0/4W8PTy7D+1rKzer/AER9dlGE+rXqS3Suzyn4462fhP8As56B4Lu7xrXUtaaXWNXQW/mybphgDaeM7OhJ4zX02b5hWjNuk9XdfJ6P8D57PMQ5zcoPf9T85fjrqq6rqdxLqLDUEUnbDqmsGBVHYpFGRz7V8BjJU+X3rN+p+eV+RxcJfEeb+FrYNMCkAjySdoYkL+fOK78mw8IrmasexltKpThqfRP7JunrB4uTW5nkKWdoXZ4s5LHgdBXbxTjaeGyKbTvdaeZ7MU5KzPV9bu/t11JeSyvvZiSGV+R7lySa/mXFVlVm5JWudsNFyoxLwCYEFfwrmhdMtpQMG90ya3b7VBxg5wK6ozhf3zkrUXKN4l/w94jJPlT8HOCDUzh2MaVWUHqb73fnxB1IIIrnlA9CFRVBYQQmSOtOOhq+wlwpYD8qcmkibJakfl7F5rJXbIlJFeVG3dO/FaOJnNNkQGQRjqamz3JUbPUrX6ELtHbrThuTKxWjjIGDXRYzaSZE4xJgUnZIhu1z3v8AZE00x+E7m+K/8fF+xB9lAH9a/qjwUwvsuEp1rfHUk/kkkf53fSfx/wBY4+VFP+HRgvm25fqfQeix42Kf5V+wxR/KOKe56H8PVP28DfxgcEda7qHwM+bxTvOOnU9Au5Jfsx3BVAGNmazUVzpp/wDBOnGSlGg3JW8jgfEjBZZSG9eK6G2ebhrtI8/8QpuLHHeuWofU4N7HCeJoQQ20Zry66ufV4KW1zzvxJpxjd3inJ3feU15NWn0R9hgq3Mkmjz3xRal967cY6GvJxEEj6/A1LWZ5V4408Or/AC889q+exkbxPv8AK6zTR4h4z04rcvGy9TXxmLp8tQ/V8sxDdJHB2VkLPWXt8fdkyM+lfIY6Eqc2j9ayjFPFYOEn6Hqvg1f9HTjOQK+RxWsj7DBrQ6GMcEYzzmvNqbHoTGyghC3fFYRV5ELSRSkJCn5eD3rpkrI6GUX+fOBWUkcVZaM/Vr/gmX+zD4H+KllJ4NvILfXNKZgZtMvdJkjktnyfnEpB+YZx1Ar7DKcFh4LXWP6nk5pKpzvpY/Sr4f8A7IfgT4ZQBhqKoxaPzGuZdzMqfcJJ+8y9ATzivedOkp2ijyniOaOpxH7TX/BPf4afEbwxe3mnaTaLHNaXKF4otwMcxDNgDpiQCQD1B9a4sXhoV3ZoFmVWLSvoj8MPjP8ACrxB8IviNr3gTXrIxzaNq0loxYdQMlT+K818Ri8JKhVknsj6zB1Pb01JHHOh278cD1rzpRSlZHoqnFiKuAQ3fpxVaRRu1aNipqOoJAvlxdT2oh77u9jhqOV7Ii0jTpLqYXE46daJytojWnBR1ZtFFBAToOuBSbtEpuzuDKdnHTNYLcEnNktoNoK9qJKViuZbCxL++JPTvVxTsU3cuxsfuoBjsaGkty4xuBj+bJ45rJu70NXJRViS3AyxI4K1d2kZyd3ofNP7SbyP4lWBB0l9fevtMl5YYdyZ8bmkUq3MzsfgtZiLS43xztrxMzrOrWaR7WAmo0UemQ7vLGB26V4/LZanqRfMixaR7+c4FWkmjN6SJ7su8QjBzx0IrWKUVdlKDauyxpqCKPk4z19qxqSc3oP4SWQF5PkGR9KIxUVdlwTvdk0CYOc1Dn2Lm0y5bglh/OspMzjuW/mY/wBKUY31OuD0HrgDIz15rQibaYkgyDg9cUrInmZXeN2YE5H0pSWhWhL5Rxhhz71nGOoNuSGx2hZ9xBwP1qpy5VoWlZGhYaZLqF1HZwIS8rhQoGazhFzlYirJKJ+pn/BCn9jbxhbfFVfjT4stFXQrDRUubGFk+9dSlghPHUIm/wDFa+7yPCOgnVfbT5nzGd4qEMJyLdv8j9Av2lPGMlxFPYPPImnWR2SJE+DPLj7v0FfS0JqGjR8lCDi7tHzN4Z+OGm6L8XLXwkZJprrzFkAadY7eEZyFJYfMfbFdyn7urdjrp0PbPsdX+2h4l+G9jYHxx4hNro988BEmreHXtxqCoyfNturkhbUEcFogZDnjHJHzOaV6CjJN9Pn8j28JTrwlThTg5puz2tHRu71V100u7taWu1+PXx18R+ANa8aXh+HGhWtnYCVsNBdS3MkzZ5eWeU7pXJ5LcCvzrFRoOpenGyPt6blCkoyd2jze7cliG6nrWcYqOoOPcqIBvOG4zxVVLtWIcuV6H0j+wV4zTw58c/CpuPDwnshfKLq7jtkj8vJHLuzBnHsM8npX2PB1Z0syhzaK251QlVq0ZKL1sWv+CnXgu18IftKeNkWJ0jvZRdWeeMkgSKw9iAw+or+tsJF4rIKFR32/I6K9aVbLaU+trHzd8Oohrt8PFHgC4inkivVmt9GuGRbcyYxJMzu4CtwACQcDkYxXxuKhOOK5ovqfG4lS9u5tf5n6s/sP+I/FPi3wRaaf43u9Iv4mO2NbG8huJLbK9GkR8tjpzmuWtO1S8ZbGns4Qj7SDafmfBf7ffgVfh/8AtW6vobyra22uWzI8phVyJEyUYB8DPbPUZ4r6NUaeYYFQqrmjJWaPp43rYaM1+B4xHvk0yC8OCJVIJBJ+YHB6gfyr+XOLshrZDmMotfu5axf6HPUkpXsZur2/nRcDpXydPcxlqippUZRipXjNdErJGdODbO++BH7M/wAYP2qvH7fDT4J+GU1TWFsZbs28l3HAPLjGW+aRguegAzySBV4DA18wrSjS+zuPEOnQp883Zdepx13pWs6Bqd34f1/S5rK+sLl7e9tLhCrwyoxVkYHoQQRSxeGqYStKlVVpIdNU7XTuQSjf36HtXLdJG65Yka4ViFPJqG3IpzSN3wp8IPiJ8R9B8S+KvBnhyS9sfCGlJqXiGeNgPstq0qxCQgnJG5gOOcZPauzC4CviaVSpT2huZ+0purGDestl3MnQdB1rX9VtNA8P6Tc39/fTLDZ2VnCZZp5GOFRFUEsSegHNcHJUrVFCK1YOuqdNzlokX9a8N6/4W1S98N+KdEutO1HT53gvrG9gaKWCVThkdGGVYHqDVyozo1OSaszSM4zgpJ6M5qG1vNQ1hLHT7V5pp5AkMUabmdieAAOpraFGrWmqVKLlJ9FuZq50WjQS28pjlQq6nDKeoI6isuVxk4yVmtzelFHNfEVlNyAWHB5J7V2UeeVlY8/HX51E9Ak/Znfwj+y3eftF/Fb4hW/hqfUokk+H3hCTT3mv/EUAlVJrxgCPstqoLbJWB8xlIUY5r6OHCuPxOAqYpRdoq5nTo4uu5vDwcqcF78tkvLzPnD4iXgu9MLEAjOQe1eVl8HBnj1pKqjK8D2LSyBmGMHiuzGVUlZGuHiken+HrKSUqscZOOuB2ryPZuctTu62R92/8E7v+CfsPxj1RPi78Q7WeHwhpF3Fc2NyxaGS+kC/NFjOCmTye/QV+ycE8I0qVsbi43k/gi/zPbwGDjSn7WprJ/Cv1Z6P+2p4otfjZ+0H4Y+AXh63jj0azuUja3hfEUEURDP0H90Yx71+6RVLLspkpxfNNaWdrO63VtVa6tprZ30s/Yx1V4PCcl9Xqz48/4KFeINP8XeL9Su4/D97PBBH5EOy/FrGkSDaFMjbcKAB0JzX53jMVzVGr7H51meLmo3g7n5yeP5NFuddNpYadpkbh+tncvcOf96Rjgn6V85Upwr4hLQ+XoWr4pXLXhGxuJb/ylO1SuGdu3rX1+W03TjqfWUozS0Wh9Rfs86DqGjeD7nXrQiKWd/KjZSM7B9cV8X4iZhNYeNClKzPRoWlLU6e9e7lXN3cF3J6EV+FzlJy953Oumlcy7hwueMAU20KsrakUW2ViNoINTN3WgUpXVjP1bRDG32iz4I5OKqliLe7PYyxGGT96O47RNfZD9nuOCOMGt5WkrxRw05ypyszorSeOWPcrcEflWEkerGopx0H+ZtJLD6UJ6ag4NvUxb7xhpdte/ZGmUMTggkVpGlOesUc061KE+W+poQz293biWNsgjIrOamnY6HONiB+pAwBSs0jlk22Vrghvx9aSdiLNlc4Ude9bxkmhNOLISBk80pbE1E+U+nv2ZNJGn/DrTxjBl3yEEerH/Cv7R8NMH9S4IwkGtXHm/wDAm2f5Z+OmZrMvEbMKkdUp8q/7dSj+h7No8QyBnmvvIrQ/B8TLQ9A+Hsb/AG0Mq7iAMDFddFrkZ8/Xb9rGy1udxfW+ozQs0RBVVzIfQelKDgpasvGUcVVg5fZW5wfiBQDJ6k1tO3Q5cNrY4PXwxLdiK5Knc+nwmhxHiFclsn8RXnVtWfT4NnCeI4Q7MX/AivNrNH1WDnZJI4DxLbo+8Bfzrx8Qrn1uCm1Y8y8Z2Pyvxxzwa8DFRufc5bVV0eK/EDSzvaUAcE84r5TMKWtz9PybEe7Y811S0MOsRXQHEnB+or4/OYWpqaP1XhfF/vHQfqj0XwbzZoM84A4r8+rzUps/UsGrx1OjOMZC8964JvWx2z3EkT9znFZx1kCWpm3GApH610z1N+5QiJEuGHU1nPY4K8tWj+in/gkzpCxfC3UdXt/CMWn3UFuXjWG584NgZzyeK/R8v5JYazseBnnOq1zp9V+Ndz4w1TULB7q4kmtGxeRKdghBJAyeOTjitqbine55dOMpU7vY9E+GnxA1LRbaKzvLn7bpVxH/ABndtzxzVyipEuKWp8pf8FV/+Cad18X9D1L46/CHTBc3981tPqFvCMtviDqW/FG6+wr5vOcM61G0Vqe3luZOnUjCS0PyU8VfCbxt4YTzNX8PXECO8wUvGePKcK+fTBI6+tfHfV60ZXa2PpvrEHLc5W5geOMrjNYOTlLQ6lO8ShDpfn3Pny/d/lV875bIhQ+0akaJEmyNQMelOPu6shzuxVU8nPNZTd2NXY4KTwBweaUVdmyaiiWCMhTnNaNpoyejBSFk9T6U0rIcE2y/ZWV1eSxWdnbySyyuFjijUszseAABySfSueo25G0p8ur0R6D+z9+zzfftCa5rPgzSPFtrpev2mmyy6DpuoRNjVbuMgvZhh/qZCm4qWGCyheCRXTg8N9Zm4t2fmcWIxUqMo2V0932OQ8O+EvFHiPW5PCWk6FcPq0azCXTmTbKjRIzuhDY+YBG+XqSMAZpOjU9q6dtUdkeSUOa+h8t/H1A/iFbiQH/Xf1r6LAVL0eVHymcyhGrY+rf2G/2RfA37Snw213Wvht8Z5Br3grw7JqHjLwjqeg+XejEhUS2IWVhd26AqZWPlyJnIRgRRLAUasZVJyafZK/p127mVDMnQnySjfsaVl8FPjB8Pfi5pGgt4HS+vYrc69p2UEtnqdhbxtctOjHiSLy4XJB5+VlIDAivGWHrfWVCKvbX5I+khVhVpSjs7foexftV/sa+L9b/ai8XRfAbwBa6f4Zl0O08XIkl/DBZ6RYXsMcyxNK7bEAklMaqTk4AA5Fd1TK6zry5FpucGCx9NUUpu7vb1PmnULe40q9k03VIDDcwNtmibqp9K8iesmj2FUTjdHqv7Nn7JXxW/abtvF2qeAP7PttL8C+FrjXvEmsatcGK3treJGYR7gDmV9pCrjnB6AV6GByyti4ynHRI4MVjqWHqRjLeR5xYyJcIpQY3AH868qoveseiproeo/BT9lb4j/G/4c/EL4teHLiws/D/w30VL/W9R1KYxRyyu4WO0ibGGnYbiF44X3GeqjgK1WhOstIxPOxeZUsNiYUXq5duh57AwJznj61xct1c9KGrLKsGbH5cUm+VHUvdjqDOeq9KlSIVpPUbvKgFh2pttky0Y5HBHPX1FNXY1dkyI0p4U0m1FGsYpLUtw2uAFA696xbu7g2fSv7AX7IHiT4/fES1eHSpJYWmEUaqnVdod29MFQyg/3jXtZZg6lWomlr+h5eMxCpR53sj92fhR4B8Lfsu/BO30WUwQNBAJLwx8B5yoARfYABR7KK+7oQjCKiv6Z8LXrPG4ty6XPm/4z/FvS9XhvI7e6lljgV5Lm4jI2xsckku3yK3uTxXRKcYy1FOm9bHw14b/AGhLH4gftDDw/wCG76H7Bp0hBTTJd4d8/ellwTIcemBWka0pU7LY76FKcaXtD2T9rDRtO8bWdqbP4c6r4w1GK0Urb3O+PTrXj70jE8+/SvnM2um58ik0j2sBKpGzvZH55fGXw3qHh7xTNa6zqWitcsSWstCZWgtR2TK8ZH1NfCV7892fUULKOupwN2mHJ7DvipTui6jvsU4tzSnaep6VTk1EIwW7PSvgx460X4f69Zas2nWjXPnLi4u4WuGHI4VB93616OXZhTwteLjC7v6m6rRpRtFanu37dOj3XjLXtP8Ais267i8SeC8KQ3EdxbYJQZ6ZADc88mv694SxbxOUWTdkrnbhY062G5Xpa58UC28QaRLew+G71LCys7tZpjcQCSG3DYw5Qg8tjp3x7V4GYxcqkvet6nyWYRnGUpJfNH6G/wDBNr4u6tfaLHEmv+EtQhV1Ah0S1hsZh6lgqqzH2JNckI0+V6nHB2pO99e7uYv/AAWa+HL217ovxh0qFo0TZJJIqbiGU8g/hX0eTTlPDSjfY9XLsZKVH2aPh1rvT7PxRczM3lWN4Ulk2RY+8PlkGST1PIFcPFXC+FzzL5Uais3qn2Z6qjy0/e1LOo2E1pJ5M6/eUMhxwwPINfyzmuTYzJMdLDYlWkvxXdGaiuS/cpwwqj8cc15tS7Ri3bY9g+DP7PfjD4j/AAi1P4ofCH4nxQeJtB1tI77wpZXZg1CSxMYYXcIyDMobcGVeRtBr9W8LcFODq1cNU/fytaNk00une/yOvJs4xGXZpyte5JW12+dyr8d7rV5ptE/aJ1vRbW/nnmjg8VW12hMdzfW5G7zQMHE0agk9c7u9dPiPklWhmNLOFS92VlUVtE1uejmWWcuNdS1oz102uVf2ofhb4P8AA66V8bPg1azTfDnx7ZSX/h2OWbzJtIuU/wCPjS5jnLPE+QrHlkKn1r89z3Ko0OTFYdXpz2t37HlxwlWPuVN09X0a6WOz1v8AYa8O3niPwF8Kvhb8VLnV/H/i3QINQ1jQNR0j7Lb6MZIvMxJOWICgYG4juPWvtl4Z8+CU41nGryqXK1dNeq27HZDJsQ8JXxNZqEYfD1cl5W/Ix/hBovxf+BHxU+JHwC8WWUuk3Wq+Abqx161PzpNBHNHKWVh8roQuVYZBzVcJZDicDmuIweNpaTpy1+W6+89ngvA4TFZrH63T5otPlb6SadjW+Ctp4l/ZU/Z38QftieHCp8Xajqf/AAi3wyuwoJsrmQZuL6MH/lqkR2IezSEjkCteGOFaODp1cfiFzWdonHPh2Cqyw+J1jFuTVt0npfyf6Gd8Xfg9478TeALT9oq61u61+8vLW2j+JE10f3+ka1LkeVOWOS8gUP65PPUVrx7wfOcoZrhVa8E5Q7WSu7f19x1ZhgqFbFKNCCp+7dRXWKW6XY5n9kcJ8Nvi8nx18U6I76L4U0u71GC6kg3QvdouyJDng/vHTI6jIryvDTLVDF1c5xVN+whGSjKzs5K10ns2rq6vpdX3PJwWEhXdVVvdSjf11NP4Afs3+IPi/wDDLxT+0D4x8Uw6FoGnXRtrAvbb59Y1WVspaQrkAKM7nkJwi9ieK8rB8L4nOniMfVbjFuUvXr/wCKNOtUxUaVON3L8F3Lmi/AL4E+FPCuoftFeL/iLB4/fRvEraJ4U+Hem6dNHF4r1YeXsxLkSSWilsuFRS4Crkbzj28n4ew2EwNLFV0+dtvlaVktLapu736WVt2KeAlUxzg17iV3K9rPqrW/rqtNZ/2/fiZb+FPibqvhTxtYTap8X/ABT4UsbTxBotxdeZpnga2ECmSGLaFUSBQAkQGyEEqNzHNfouJxuBjh3Qw0bc0LWv5avob0syVPK3gsJ8ErttK115+fn+R8WeJPDPiC++Hk3xEt9LuToEOsjSk1NosRPdeWZPKBPVgg3HHQYz1FfiU8JWw8XOSsr2R8TUhyNqw74daelxaiZmCqMbmPQZrzqic52NKNlG5+gX/BN7/gnVqn7Qk9r8Rvinpcmk+ENKut7TFismrgdEXP8AB6t36Cv1HhPhFTUcZioafZi+vr5Hu4TDR9nGpJa9Eff/AMefij4f+Ffwvl0XwTpdtZ6Zp1qLbT7WIbE34KooHTPQ1+35Tl371Tqf0j6fL6FqnPU3PiLwXe3Wl3fjL4y+Jr+Mta2/9naZMreaGnkXdMygZ56A/QVOfYydT93F+6r2Pnc5xjxFZpNpK58F/tX6wviHU7i5vtAvNUVnZg+sXskFrH77cID+tfm2MTUuj9T89xsrtxR8oTSjUdde1abT440biLTowIk/4EOW+tceXJzr/wCROXUoQndu7Ox+Ffh6fUtQbajO3mBYy3Qljivs6clSpNvSyvc9+nzK7vofVFlpdpomlWuiQwBRbQBSSg5OOfrzX87cWZiswzac+2iPWw3u0xs54ICkV8k7Jm0W+Yzb2JyhO3t0xTvd6lVI8yKmlGUSZZ+M806l4mdNqErGowQDaeQawUWzSpMxdb0XcxubUYYc8V10ZuOkmctWipxulqGha3JE3kT8EHHNbzUbXijmo1JUp2Z0Ec0dyhK45Fc9rnqJqaujxf4ueF/EVv4hXWtMuXARiQmTg17+DxFCnQ5ZRufL5nh6sKqqRep6z+zp8N/iT8Xfsuj6NbqbmciOJdhYufQADJNeJmGNw9BNqN2uh25XSxmMR7t8ZP2YvBPwK+HqzeM/GjHxWzhZNFdCjRDGckGvlMvzTNMyxcn7PlpLTzPfxGDw2GoJqfNPqeAzuqt26etfTwhzM8tyWxUkmLGuhw5YFWuRu5HB6mstZy5V10OfF1Y0cNKb6Jv7lc+xPhFpP9meE9Os+nl2cYIx32gn9TX985Jh1g8nw9BfZhFfckf478XZhLMc6xOJe86k5ffJnpGkR4KjPfrivXWx8FiXoz0DwFGyyMTLs4HzeldVKyi9DwK1nUWtjrbtnitWVJDt28nNCXNMjGSlTp8kXocRrjsSwJ9cH1rR2sLDrVI4TXwxLljXLVdz6fCdDitfQ7myK86rc+mwj0OJ8QJu3ZP415lVan02EaOE8Q27fMpI9q8ysmz6rBzWljzzxbbeasgK84rxMRHU+wy6dmjyHx7p+5H/AHfr2r5/GUudH6Nk9azR5N4hh8sOQvMb7hXy+PwftaMoH6VlOJdDFU6iOz8GsklpHJGflZQQRX5Bif3deUH0P33CKLpqUdmjo5M7RkCuGTvI2ndscwP2fBHaphfmKgmzOkj3IRnjNazdmaSlZMotDtffUSfunDUV7s/or/4I06Vq2mfDjULG68Bx6UskDAk6ms7t8p7ehr9CyuEZ02mjw8+k5VeU8u+JV3Ja/EbXFsbiK21ZLuY2q3LeXb3TKTsjfHoehPrXpThTpux5sqE1TSWx7J+zz8SdM+IHhNLTULZ7DVbdQt9pq27FUkAwwVxwwzyCOKSqRmuVGChJSsz3j4Z+Ozplp/ZlwFeAtsImQhX/ANkhq5p0VN6FTThqjk/2i/8Agn7+z/8AtIeF9Vn0PQbbStZvNNuYTNFGFQNMoBYDpnKr+VctTDUXGUGt/LuXRxdalNPdH4d/t7fsur+yh8YF+FcVw9wttYI8l0y4Esh+9j2FfFZngI4KrFR2Z9dl+NliY3PCChxgdPpXDZR1PX1cdB+W24I/HFZuTZzj0V9oPvU8qZ1QcbD0Q7uD2qkrGc/iJCSowoHTpinZIcVzCRxYYu3FZVaj5bJmnw6GpoWs6zoms2mu+GdVlstRsLqO4sbq2fbJDMjBkdT2IIBrKDknzLdGNaUZwce591f8E+NP0X4h/ts6X+094++H17Za5qUuoDxvFFa7dNF0NNnna8CFDtNwCrsgdAjq+0FXUJ9LlVWjin7TeXfp/XzPncdhamHwMqEZ6/iehfBb9lPTPitrPiP9oCHwxDNfLpFjN4msFiYyQapaXCTpMCOdl5YswDjgvuU85FdcsLOvN1la73M4Y2vTpKF7PbU+If2+P2KP2TfgJ8ZtT8F/G/VfE+lnXbqLUPh/qNhAo0i+t5WYos0xBe34ZQz7W2FWyDxXZhMvhQpt332fQ5LyxFROauluegf8EqJtV+Bnxw8XaX+0RYQN4p8I6XBL4Hu3wZ305+J7WWdI1W9tJ7ec7ZcttZVIAU5CqciqezsnLa/QTw03Fzi/kfcen/soa/rf7Pfxh+F9g8P9o+B/EN23gDVoUDSw6LfW6tNbBhztaKTnsWDGnQwNlKzs7aP818ylXU8VSb+F7rzRp/8ABUbwx4d0/wDYo03wf4U0Sa1ufEHhHS18YXmnwFpLqK1tmSwhxniPziGOM9BnoMaZhUqQw/sqXVamuXqEcQ1L7L0PiH9nz/gn/wCKvFnxLvNF1/xLprQ23w4nsr3xLrOmvEkuqyxeTcHBL5a3mmVN5+Y7R3FeDgcC6tXVWX3nq5ljVyJRv33Prn47/s36V+xR/wAE+tG/Yc+FGtG58V/GLV47zxl4imh8oyaZGA0jspbckKoAADz1GMvX0OIpxw2FWHpOzlu/I8enUq4zGKu9kvXU8X/YV/4JB6x+1B4/1j4neKLG68P/AAu01LiPT9Z1iM26X0gUpHLGpILohJc4wGIC7hk14uV5bTli268OaFn5avZ/Lc7cwzeNHD8lN++fSv7TP7KPgX9njwT8MP2cvhX4Ge9+GPh/Un12bQtSuUS9+JPiDy2Km4LD5LWJf3k00gWOOPgc7AfeeHpwjGnCPuLWx4uBVfFV5Vpy956X7H5C6h5q6xeiY2o23sqkWL7oM7zxG38SehHUYr4DEyiqslE/Q6FqdNJj0AXp1rlV5Gsql42JI14yB1NKzJT6gFBX1PbitVG2rGk5MktbVmk3MMA9qU59Ea25DRt7TBwqg57Vz6yYpS5Uet/s7/ss/ET46+LrXw94Y0G4nAvreO9MMRYwRyOF8wgc7RnNengMtqY2uqadtVfyXfucGKxKow5pbH7rfsWfsreB/wBkT4RWGq6rp8NtqsWhw29/KVGV2FmOPclv0FfbYLCOhTV17zWp8XmWMliqnsoO8U2cD8cv2gb7xlrU001qTpUKMIYZFZowARwQnJY9ePQ9OK9eFLklsRQoKET4x/bB/aAFzpNzYabogltLeIuNOWyggtkbuwW5kCs3uQ3XpUYh05yutDojSVWVo6PzPnX9jOSbxN8Sm1+5tEt5JpdywFIRgZxgeSirxyeBXRCmvYNJ2stPP+t9TrnGTiqa2Pqz9pfw9qfjW1TS9T+IfjC+hSJVTRfDGkuwUY6E8KT7818bndGtKLabt5I97AU1CKtb5nwd8bfBE3gjxE1lJ4U1nSkcnYmtyjznHqVH3a+JmnTdtT3IWlE851BkVDnpRFNsHZlG3OZgR68VrpYhNvQ6nwhq1tourwX80cTbD8onciP/AIHt5Yf7PeunBV1hMRGZvThG92fT7+K9Q+LfwD1H+0ENxcaFMt9Y3A05beOSEjZOkUYAwgQg/hX9G+GmfTxcqlKSt8rJ37LsdtOp77S0Pjn4m6Te+DPiY6rOfslxAIwVTIYEDy2x0OVx19K+kzpSoz97ZnzWYyVFtdWe/wD7CuufEe01eMQ+DNEu7O3ulCXGkQ7L0g/xMiHkj3NeLhcPP2nvbHjQrVKi5X0PuD9qH4Zz/HD9mDVfD+r6NcJe21s81oLyM+YRjnIOcfTNezgcTChimqcrxZ6+XQjTrLsz8gtTE2mWws9QbdPp80lheDBXgE7cnjt/Kvqoz542ep7c4S5+W5teCvEVnqkC+E/EsypjBtbrdkx7sAE+q+3tXyHFXCmD4iwzpSsq0VeL6/PyGoprXYu6h4evtJvjZ3sW1uqspyHB6EHuDX8wZrluMynFyw2JjaS/HzRm6fY1fCdxfaHrVrqOm6lcWDQzKTfWZIlhGeWQ5HzAZrmy/G4rLsXHE0JOMou+mhrCSpp3V2fWut+HPhp+0DomqaP4Q13UdS0DWLU2kep6/Yw299JfRrlZpkiZkDnJwQeR1yeT/VOUZlHjXhNrExV5q0ra+966fkj6jL6lTG5aqVRJPrZtpJ9rnkn7Mnwv8TeIfhv8Xf2U/iK6iDQ4U8Q+Hzeg4jvIz8wjz/z0TKkDrxXwWRcJ4x08Tl+Jp3jF3pvzRhLD14NU3G6T0fkdL+yL4yX42an458L/ABOvbzXIBq9ol/eWFuItQfRoHVfsyyDLIhjzuQHGQMkha/ReE81q4zAynXaWJopwWl1t8r+Ttc9/LIVZ4ZzhJKpT5nHm2vbS6Ou8LaxF4j8Yx/DK88OC5h8Da+um+HdauU/0q48PX8r25tpf72zKOM/d2kZx17MZTqY7EKtLSooe9pprudE4zWKWOvaU4JtLbnWt1/Wpk/Eb4PfE7T/Cfg/9jnSAl2+mfFi4vLbUcHbbxoiSLMT2G0r7ZJ715GHyv2GXUqKl1u35JtorHuOOm8XradNXt63Nz9q3w/4g+FXwgu/hSmtwi8vPG8nirWWugyxarfZhEdsT/EWCsEX1IxXNxJOp9XlUpSbqOybls1s0Y4HD0KlaWPs2/ZqEddl3K/jP9lvxX49+GOv/AA/8J+D7zQD8QPiTDcWmkcyNZ2UUKvIWPCxIZvlZjwAo44xXFgcnw9PI5YXmfLL3rJaXa7X2el/LueEsPSq0VCvUbUYt3S3fRb/15npXxu/Z2n8V2Xg/9nnwl4mXRfBHhbTJH8SeJlvFje91KZ905gUfNJIwGN2MYzkjodaeS1K+XxwdNcsNLpaXQZW8RQo1arXvzaSSW0UtPQ5D49eFvBvwj1TSj8H/AAwniPxhYxJYfC6GG28mw8Jxplnu3fjzbgkl/MkH3+nau3EcO15YeEILVafL0FLB1acL695X1u/0R8uftHfsR/t1a14bl8ZeE/gZaak+qzNe65rtrfyzahrMzN80s0shO4ZJIRQBkmufG5BjYYD2VGMJVI9b2fp/SPn67xDgqSUU1pvZv19Oh8q6wPHek+Fk+DPjW81O1sdF1G4ntvDdyhVYL+ZVSSTZ3dgirk84AFfkeaYXGTr/AFasmnF/D5s+ZxW7j1PvX/gl1/wSu1LxfoVl8Zf2jtPNlokbCS00ZxhroDkeYD/D04r7rhfgmnQccTi43l0j/mdeX4KVlKa17H6FeMvGVrp+l23g3wNZx21hbqIILe1jCqqjgYHA4r9cweEp00pTWx9Xh8M6b5pnzn+2B43tvC/hdzLbSS/2TIRHbyy7jeahL8qIFPPyZz7V3UIww1CcoN+829W3v2u3ZdktF0R0V8QqVByi9WeHfF7xRF8Ovgxpfw+0rTrma7itjPqjW+pGL7RcyfM5IRCeDx+FfB5pi68arimfm+YYmpKo7M/OL9o7xNr15rE8978N7NYBuMc+s3F7OVPsJCo/8dr5TF1qsorZnydao7uz1PE9E0/7dKzyxJG078iFAij2AHSu7J6XK+aW7PVy7BPku92fRH7OngVrbUhrF1EhSwQZR2O0y9uDxwDk1pxdmMcrymfK/flotT3qdN39metXUrO/b6AV/Olecqs25bvU9CEPZRUVsipJJk9OB61yOGh1RimrkF1go2B271jsxPcy7DeLhgP71dXuunsYON53LdxObckHisbq5dVKIQTxXAIHPtRJ2QUdTO1rSWjb7TAORycVVKq+az2Ma1KEndLUbo+ryxuElOMdc1rZDpy5NGaGq6ba61BuZVJI9KaqezdkFeFOsrMn8CePPGvwtjktPC+sS20bnOI2KlT7FSD+Fc2IwWFxkuaotRYaVbA3VN6Mr+IfGHiLxjqjax4k1ie8uXHMtxIWOPatqeGpYelywVkRKrf1KLyiQYB5xWkGxKN1dlcFt2Ofxrpkk4ChqyzpNm2pa3ZWKrkz3KJj6kV2ZBgvr/EGGw6+1UivxR8rx7j1lXCONxW3JSm/nytI+2PCFt5VuiKAAqgD8K/vSCUEkf4/ZlO83c7LSUOV5rdbWPmsQzu/BvmoH+bA47V007cp87inaSZ0d+bhoPJlQHPKkck1UEk7mWIlNRUai87nH66pRmVjyKJnVhmm1Y4rXV+ds1y1NT6TC7I4zXUyWwec159VH0uFkjjNfibexGPcV59VWPpMK1Y4jxFB1+XjFedUV0fT4SWxwHiiDAfK5yK8bExPq8BPVHlfjiyZ0cdueorxa0eh+g5VVSaPH/EVjHHfNGw4bOeK8LFQsz9HwlVeyTNv4XSo+lyWxPz20pQ59DyP0r8a4jwksNmcpdJan7pwjj/r2VqLesdPkdU3zAjNfPySPpprUc6jyOR1FTB+8XFWM2fcMgevTFaztfUGkyq6YXkVnI5Kzsmfuv8A8ERL/UfC2qGy1fR/C1mJzgrZ+IfOnI9QCSPwr7DLKs/aNL8zzs4hpZo0P26fAVppvxj8RaXqWBZ3skjDKcpvyVbH1xX0Ps7xV2cKalSTR5B+zv8AtK/Eb4U67B8KtT1dpI7dDb6ZpWm/6LD5MeR59xO7ARqBgcYFYurTpS5ZdDycTFpuR9t/DH4xSeOPDVpeyxpcW5YJGbK0xGx9pXOX+ozmuuCja6YU1KejPS/DHjiTw9feTPJKtuXG9LmJgyA/hyKmfK1Yv2Op8Af8FtP2WNa+I1xY/ErwVp5vLy2uCJEgQl5bdx19Tg4/CvBzvCxr4PmXxI9zKKkKMnCT3Pyw1LQ7jTJZYrqJkeKYxyK4wQw6g+lfAzk7n0vOraFGZcLjH4UkKSVrjrcjbtIxmh3JTsPZSijAzWkNUUldksVuSC7d/WoqTtojdWS0Ox8A/s7/ABw+LPh+98T/AAr+H13r8GnybbyDSpYprqIAAlvswfzmXBHzBCPetqGX4nFQcoK55+IxdGlLlmz7O8H/AAD/AGZf269f0fSvjN8XZfhz8SdJ8NW9tqP9keDZo7TUoLaLHnTwSRQtDcIo2yOuUOwMCRyfoI5Tg6llWlyysvJXa21S16Ppfa6szwnUxWCVqC54777H1p+w7/wTy+HPwY8PeJrTRf2kF8c+HvEOiG0GradZzQy20vz+TJ+7cAhQzLznAYqSFJFe1hMDg8JR/dyv9xwVsfUxM4txtJGp8Irj4hfsaeI7T4bWVzN9mnvFthpes2Qkiu7AuXC2l1j54xuOLeRi6/wEDCnCEadOSaf/AAx01YLGQvL5PzOq/bd/Yl+HX7Y/wo1jwBdaZZTWes6Q9/4LVrfabK7RS0lsMc4YncBxg5wK9OqqSw7gtnsc9Kr7K0Z9Nz4Q/wCCWfhrUPiDfXfwF8feAp4PEfwnupbLQ5NRuVnmn00BE1DTpHKgtH+8W5t8jISQLklTXzuGhOVe0pXa26aGlesow91NJ/muvz39D9OPA/hzRvhb8QrjQRoYA19rO1uUZ8iULYhCSPTCH8MV70oxi2oxOb2c6lJSSehwv7RmgaXpv7T/AIW8C6vpqXlhd6KNP/s2UB1eNMvuGeBsKrj3b2rCdOC1k9dreRvCMo0nJep3H7NX7OWk+CtJ8Q+JfHdn/as2r+MpNVsBeDdIjlmYyFjyWZyWOe7DHaqwkKWFpWirWMq3Piqiv0Ru2nwT8JeLvjR4i/ah+Nzw6jpOg6Sum6Tp1xDmEJHlpCUb5WLPjC8jgc5zV16FOo1VvfTZdPU2q1fYYaNCmrPqVfgX4s8R/tXfEy/8T69JJYeAvCc3k2Ph2C1EVp5q4Kh2DfvXUcsMbV4A61eGdCVBSg3e7TVtPKzvr56fNnk1KTU7NavrfXz0Pk/9sj9mn9s//god+0nraeFfE1t4S+E8cAsH8TXM7QxNao2GiLFkZkLclE4YnkmvNx1OpiJtRm1FrpofQ08VhMHQjTS5pfqfHvx6/wCCdmj/ALLX2zVvhh48f4lf2azRvr1xoX9l6Boblwgae6uH2XEoydsabsttzu+6fFqZOovmparzOmhmtWp7lZcvVWd2/kYHxG/4JzfE/wCEv7I95+0Z8Wl03w1K2swroCa1rqfafElvJwfslrGpIxuVyXYfKOKxq5R9XwjqS3NKOcxr42NKndq2uh84AEAJj614tup9LBdyW1tQTkg9eKynK7LT1NjSdBvNQkC21s7jeqFlUkAnpUxi5PQzqVVE+vv2Jf8Agmx4o/aT1XWdCtLNxLHp0M1pfXERFtAzEcu+MfgMk+lezl+WVa6do3T69EeTjcxp4S0p6p9D9ev2R/2GfhR+yho8OoaRYxXnieXTIrXVdb2bPNVOcKucKufx9TX2ODwVHCRtBavd9z5HGZlWxnut+70RyH7Tvxjt9T1G7gtL6ddOtIzbmS2Vm3dzgKCck8Z9BXfBR6FUIqnC/U+Ev2iv2kNG0TQrmDUtbluIY3Li2GhXjIMf7IZc8d6upVaXKmd9Jyqx5dUfAvxX+OFp+0H4pGieGfDmhrpTSIjXUGkywXKzhvmU+azEDGORisqPNOo72sjoUW2kuh9I/sQeGlXxdCoi3xRERPu6Y2g/lz+tdrqQlCSjvHT8LnVOlGVOz2Z6l+1h8Rm+zTad4h+Md9Z2xUolvpcV5JImONoVGhT8ya+LzaqneMnZPrrdfc/zO7Bpy0ij4W8X3Wmya5O2l6ld3cTMds9+hWVvcgu2PzNfFVVTU3yO67nvwcpRtY5+9O5SSeaqL0LmuVFeyYeaOf0pN6mVPfU2dNufIukmDgFSCCVyBUuLTumaSk+h9DfAX4hTabqVrqHiC8imtZojb3EV9c7pLuNxtMUUC8AEHrX6bwVmdTLMxjWnPRq2r1+SIbnLXY4z9vb9nq88NeGYdS0Ey4sYfPs7lOs9mG3x546rkxkdsV+85xOGPwMatNvSz0+/8dmZYyjTq0VNannn7L/ivxPrmvWGs+HPE9xp80a+VjS/IsBIM4KyXJwRx1yDmvnMPjG6lqcrNaadn0Pka1R06z00P1V/Z/1LVLnwWlr4h1mK5jmh8uSN9UF6xBGDlgOn6Culxp4f95LRLf8Ar+rb7HpU8Q6qTitT8yf2/fhNc/BL9oTU7fC21hrM3nQSBPl80HKkE+vSvrqVdOUX0Z71LEutTu0eNwJA1yqxyMpjx9mlZCpbAy647nPA/pXs04KcWnv0NoSna0keifDnXdI8UWEPg/xNdGMMSsF6Vy1u+ef95cda+M4w4QwnEWAcZK1VfDK3Xt6Hp0kpwaaN7xV4UuNA14+Hp7aUWcJH2SRUz9oU9JOOCW64zx0r+YMxyvMMsxv1PExaaeiWt/NepyVYSUkpKx7B8BdD8YfDyFNf8SeFr2z0q/gTUNNluflS4WGYJIVGemGce5XA5r9e8LI5jlzxFDERahNKUb9GvyPf4dlzVqtBb2Xy6nvEuteD7PWr/wDsjRbOWXU9HWDzmjHmTWwYMrZ7lWwMj1wetftcJRVRRUlzNX83bR/LX8UfRRwVWUISd/dlfyvtqcR8NfBGh/Cnxz4j8c+BmW1n8R6G8Gp2EkY/0eYZYlcDkMCea58NgcLhqsp8tru7sKdCEJOWu9zE+DXj+C90vxB8Qb5Ior+80+IwEkbt8byJuI7HzNx/Wrr1KdS7hombQlGrJKOqT/NX/I9Y8GarPreoXHjXVjFLKmoyW6XAPJ/cxgnPvgflXncl5cvYeJnGko0odtg+JsfhjWrSwk8dafa3/l2jTrJqFvvRZArBJEXu4b7vYGtqWEjUoqNV3a3duv6XJpe0pp8q07GWvjLxRaaH/wAIxe+KJoUutLji2rIUl8ojHzdlLdcAd66q+AoYjBuhK7Tja+z1Vrq2z9OpnSlS+sc0Y+diZ1TUNTeDU7iK9mtUE9rGGDLaPt4wf721mBPbJreMoqKO181FOMNL7/n+ZpWGmeHX83xN4ytbaGwtomYylQwnQZ3Zz1BORjp1q+dRj7j1/I4cRzXUYPU8i+O3gDxP+0T8VtFm0z4863o2nWejMvh/wt4RtHXMqrujEiJ/q4gAMtgfWvjM0niZ1VGlW5N2+7Z8lmmHUqntY3TW77/M539jb/gm1rviz4pXvx3/AGpb2Rms5x9lgnw5Zl4Er7hhm4yBg1y5Lw/W+uvGY1+0n0v+p5H1PnxSqS18u59ueL/HC30UXhrQEjt7SLCQKh2hVHAz6GvuqVCNJXe57FKlHDR5upi6Sp86aW6vYLa20+Jprq9fkQgHliQevoKK9WFON3u+hnUxUYLmbevQ+SvHXxU0z9ov4+y+KpZlXwr4Slc6dBJLhLu5HG8k/eOR1NcmJxCp4dKL9Tw8dmEKj5Y7WPnf9qf4i31ppt1cabpl0LGJikosxeSbF6AAwKMfia+BzKu8RUcr6t6nwlf2ODpQoUtIxSSXZLY/Pz4keJfD3ibWpVtLHV0nLnbJeXkpA56bZOcfjXgezjOty2Z5vK6uIUYI1fhv4av9T1OG0tIN88kojt1I4Zj3+g6/hX12CdPDUJVJacv+R9fhqcqUE2fVXh/w7beCfDFvotpPGzxLuuGMZzI5+8civxLi7PZ5vj5crXLHY9XCwtdsj/tGOVsM21vQ5r4hu5tOw4yq/wAw4Hes73ClJEcxDIR7VjL4jSUbsp6Oga7MbDvW9m4HO175o6vpAkjJU8gVyqTjLU6ZLnVjKtYHtH9++auVps5nenoXAgul+UA+oouoKxpTs9WZupaMY8zQDkdQBVUqrcjDEK+wzS9SZH8qUnI9a2qQ6nLTnJPUvzxpcLkDr3qYyaOxSi46mdc2MkYLJwK39pGSsYKKvcp+c8XU01ZLQU5SSJEm8wfLyKFKw6SV7nTfBvTTqvxP0e3dcqlz5jD2UE/0r7bw0w/1vjfCq3wty+5M/H/pBY/6h4X41p2c+SH/AIFJX/A+xPCsTC2THYV/aMddz/K3MJJ1GdbpIIZT19QK1R8/iHod34PwInY+gxxXVD4T53FuzRu38khiJZsbR8vNXFK5y1JSnJc5yGtZJZmJJ9aU2ejh3rZHHa6Mlua46h9HhbOxx2uISSa4qiPo8K7JHG68hLnsa4Kp9JhWcZr8R+bA7815tTRH0mFlocJ4kg++MHkV5OIVz6jBT2PNPGNkXDblrx60Ve59tltVK1jyHxnpJW5Mo7GvDxtlufpOVVlUhYpeArz+yfGH2GUgRajFhc/89F5H6Zr804zoOphlXivhP1HgbMPY490G9JaHeyx/PyMV+eQkpWufr8kuUUqdhXpnpRflkWrWuUp4MDcRjHrVyV9TCdSzdim6B+M8j2pygkrs5Jqck2fqD/wSc1fW/CfijStQXxH8M7RWkASIThp355GSCc/jX0+W0bVeZNHHmVKvVv0R+hH7efgqHxTPpHj22tklGo6cIrmWMfLvA6g/lX08ZrlseVh4TUeVs+APi34Ga78Uf29BaJEYNLEsDMpeOS5SUrh1zzjcOPpxzmsKkOZ7CqUk7pdTK+Bv7UXjT4U+Nbrw78RdX1PWtdWQIvk3SxmNTyBGWwttEox9xST/AHu1OhW9lFqo7ihhVD32z7x+D/7Rem+J7TTxqA0yWe6TaVtdTluLnHo2AQD7nimp+1leJz1aqTseqfGvwxPrngW21Tw4k0lxpiC6hNxAAWA5ZG7Hj/8AVWMoLmtIlVJKzifG/wAe/wDgmF8If2orf/hM/hjqyeGNevb9bzULMoDBdnHzBeyE/lmvJxuS0MQ+eGmux3YbNKtF8s9Uj4A+M/7E3x5+Evim68O+Jvh9fWsqPcyoZIvkFtETh9w45XB6183WyuvTm9ND3aOPp1Y6M8gFhNAw82MrnkEjqPWuGVNxdmdimmSKmXAI698USjaJvTTZteH/AAb4p8Swm70bwvqlzZJcLFdX9ppU88Vux6bjGpxx261lTw9Wq/dTYsRXpUVyuVmfaP7NX/BO+L463Gn/ABJk17V/A2uRxxy6frfhzSLiLRdRiUBQ8r7leGQYxInyEHnvmvq6GAhUoczcqT7q363X4Hzs8XTpzSsqq3s/+Br9x9m/BX9hz4jabcQt+07Zaf42vLXyn0fx3osJLuI33JFcMr5YEZUknJU4IINejJ127NqS76HJGtBybpNpvdM9d8Nfs7S/CjV4PHH7LbP4deO4afXfh/eKpsdUViBL5L43RScZUBtmc8DcTWPs3zc8L37EQk2406yuu/X797fl06ntWqaPoHj/AEy3XVNKiaB4ln077TH+8tnHWMnqCp4HpXbTmkioxlTmysNFFno5gsIFMun3iXNmc42sOGH0NRVm1HQU4RmeefBf9k/wJ4J+PPiv9oXT/DUFtqOvxRo/lrtWXBcqzjpvXzXQN/c2jtWVCjBS5+xy1bytDoj1A+AbfUvFa+JbuLdKsh8rJ6cEZ9uGI/GuxVLNnbSvGjZMwfEfwisPF37Q8XxO1S1Vk0XTTFZqx/5aNjJ/ICueonKqZySUFE9Be0MsGPLA2MMY4yRz/OtJdxwSRa8Q/DFvHPhe28InV7jT7JGEt1LaNtleTO75W7H36/lXVFNRTi7Na6dzlnWjGpKctX0NnRfBfw9+HnguHwNoelwWmlW6bRaIDh+5Ld3JOSSckknOayjy01Y4IOtKrzLVnB/En4bfDP4t3MVh46udX1HTbJleLQbW6NtYqB0EuwgN9Ce+MVnUjSlJXOtSxFON6as+r6lfxX+z38GPFg0zWbr4T2msx6MyvothqsZk07T5F6TJb8q8g/vbS3uK3l7kLJGdOE5yu5WffqfDf/BQr9iDTfi/4pb9of8AaF/bE1GO1tlNlp1rfeB5lttKhGTssbVBmSQnADHr1LHivJxmDWIaVSfy30/zO3K8Tyxao0tbtPWzdnbr07W0e6uj8u/iN8K9W8EfEDUPDUWl62tsJmk0yTxBpDWV3c2xyUmaEkldw5Ar4/GwhSqtQeh9vhK061JXWvk7ln4f/CXxH49mtotItGAnnVQSP4S20n8DgfiKwpUXOW2g6uIUYvl3P0l/4J8/8EjfEGvWh1j4saJNp+gTXMV3b3k48q6mxghEjOcDr8ze2AetfTZdkk5JSrLlj26s+dxucRpXUHeX5H6g/Df4beBvhB4StvBXw+8OW+mafaoFjhgTGf8AaY9WY9yea+np0qdOKjBWR8pUqzr1HObuzD+PfxHi8D+Bbv7JcqLu4TywQ3MSnq2B7cD3NEm+ZRRVCLnUu9j85v2qNf1XW7ZtM07X7E2LwkiyvZTs388s0cyNn/ewParahbc9WMVPU/MT9r6H4gadqDtpVpbw3TTLFFdaTqdxGYyxwCCZHDfTg15Uqk5VUlqdkabXwifAvwpM10L+UtNNE3+ukGTLNkbnJPXJJFe3QhK/MehSi4Ru9z7a/Zt8N3Ph7RJdYt9OaSRLfEMQZV8x8dMnAp4utGlSaRaXNKx5D+0T4713SruZfGHwA0xvNLKLjVonlMfPDIUkx+NfnOYYiu5tumrHrYaCmtHsfOV/cJLM0kUSxqxyI0GAvsPavn7XZ69JWIJkMkJYdBV3UQm7lOzIE2Pek+5m1Y1k5HSo55K6N4Jcp2Hwx8b2vgfU01GPV4dPdj81xFame6YeiZ4WvWynGLC101Ll76XZlUcUuW1z7A8M6TH+0J8JJ/B2paS8N5DaPN4eg1GQNcXMZGZo39N4GQP7wFfv3CubvGYN0J3Se192jilUaTjumfnh4m+F8fwu+L118P8AxTotxe6dd3XmabbpqJtYsE8lmA4xgZ+la4nCUMBiPe1TPncZh4puUtz9B/2FvG/hHSNNg0xPGfhrT3QLEtgvitriTI9VC8/ia9PD4iFeNoInCOck4bmt/wAFQPgRZfGH4Vr4y0q3inmsI/8Aj5gXJYA53ZPI5717mXtuLptvU9/DR5qPs9nc/NO60rV9Bv49M1WVZroKrxT2nWQNww46P0BBHavpqUrU7NnoYX2ilaWh0fhHTli1O3ubYNl5PKEanAZ+flHtyMk8k/StHUhXmowu29LefkelGs4LU+mf2fdVvvFdjB4JuTDPLt3W73Vup2PgjCsSMZA9q86eWYWvyynFOS6tLT0Z7GGVKtJe0jdHvfwd1G38C6dq3gq00/QbFLpJA2n3cCXqTmRWEsitMC0MmSThSc+tZTyvDxaSVknfTTf/AIJ9H9QpYlQnLm922qbi9Nk7bq3f7iD4b2V3NCz+K47NZrEvBavCekecr0xhW4BAz61306bUk7eR6l6fwRbs9/U2IIfDevyLNqOmvo88RZAzssigDtuXJZW7ZHHtVRjOau7q19/L0vvuvXVJkYiPs/dXvI888b/svwXtnrHiD4Z3kFhLfxETxSs3kzOf4kYfdJ56+tcVWLaahpc4JYhwSutEanwVsr6PQ9U8PeIbC4sbiyvxN9jl4aYkAFge4yCc0QtGNupg5ynJTZ2vwu8K+E/Geur4h+IF+4js7iW08PabBLua6mUZMm08FFyO3Gee1Z15TTXLpfuVXq4iFK1NX7s6+/8AhR+zRp3il/FPibUdVv8AUoLdYFSe9j8m4Y5LOoC4yvr6niodTMKseWCSR5sa2ZuacIpL0H+H9P8A2YNXvJ9A8I6dcSfaZVW5M16u+RuflVgucc8gde/Ss5wzGEOao0kd/t8xjG85RXy/4J2Wt/s4fDG98K3Wh+Mvhzrk2mTspW2tNVl2lQBtGNoAHfHPU81z0sbXlJqNWN/NHDLHV6s17KrC/mv+Cc54h/Yu+Gev6pL4x8L+OvEmi3DxwpqNi8sYjnhTO2MsoBwMnhcdTmuOcYzxKlVin5o4K9bEc3LOKd+qf6Br+mXOhaSfDugXQksLNB5MglJ85j3w2CTn8q+uwtWkkmlqRCEvtR1ONutQlt7tbZYWkmjbDLu5Mh7VtXdGSUrbHNWnJbs8P/bU/aA1Kys0/Z7+Hc80d1qYRtevLdwChBBMfPXAz+JFeDiKt6nM9+h81jcZZvm36Himta/oXwt8DR2lwNX0yyEZMuojTXlUHHLFk+77kggV89j8e4/u0z5upWv7t9T4l/ad+LOi3N5O3g79oy6m3yESWGnXzRK455J2nJ9uBXz1T2TTl7SzPBxU17SUZLU8N8P2Op+I75rjULye4Yn5rieUu+PqeprTAYSWIb956rfqj1MmwcpTVRo+oP2e/hc3h60Xxjqlr/pLJstYccpH/ia4OMs4WBwX1ak/ee59PKCvY9DuZopSWjllUk8xSdq/B6z5pOWup10bmZqViLhd8cYDD0HWuNTs9TWUVNGSt3Nby+XIMfWtGla6ORRcJal6F1ljOWB44rNJt6m7qK2hFpaYvTtH8XJrouuQypvnqG9KAykbeOhrha947eWzKM+nLMN2MHsRWikooxqWkP0fQry91COytkwztgE1CjKrKyOaU/ZrU+ovgv8Ash6V8bfB58Mr4Qaz1SKImO7IP+kE9MN0H0PWvpMFl1OrSs1ZnlzrVI1eZv3Tyf4z/wDBPr49/DLWJhZ+FLi/gjLFGhjO/A65WlXyrFU37qujVYnD1I3TPI77Q9e8PlbfW9KuLVnB2iaMrnHXGa8qpSlD4lY2jOEo6MgdlZPmHBHOawacXcqMkZ2o2YzhR9DWkJXL5ebcqW4aM7Txg1ra6uYSvCdkelfsx6d9u+JD3hXi1smP0LfL/Wv1zwUwarcU1azXwU397aR/Mn0qcy+r8E4bC31q1k/lCLf5tH1f4ch2wKAf0r+rIbH+cuNleTOn0wfMOK2R4lfY7rwiyrEx8vniuqMfcPnsVpNM1dQmDhmkPPQGrSOWTnUndnKa02GO8+tZzPUw3kchrRyW5rmqH0eGWxyGtDls1xTPocM9DjddUFmHOPWuGqj6LCvQ4/XIgzHnn1rzaqPosJLQ4vxDbk7uPpXmVo3R9JhJWsee+KrTIbI4xwRXk1oo+vwFS1jy3xfpZkVzjvXz+PjdH3+VYjlaOI1PTLuOz/tmxY+dp0olUDqQDmvncbgI4/AVKb7H2eAx/wBSzGnNaXaPRrK7ttUsodTtWzHcRCRCPQivw50p0arpy3Tsf0bQrxxOHjUjs0SmNVGc8Vdrs1TZTuFMgKov4it+ZRRSppvUrvaiIZxk1zTlKorGdRqKZ9O/sD/Eb4L/AAl8QW8l34r8TRX9zLgw20Nu2eeAhaN2DehGPwr3cJjcNFpRumGY0/Zwdz9sPhb4x0f9ob9mV9N0nTtaWfS4RPbya8hM8vHPJAzX1WD9+F2fFYivKFe6Pj743eCZmjmsLVmS6NpO6MkfBbB3jB6Zwp/Ou+K599WdkG17yPBfHHhzxLrI1jxJo8cMeoLotrcSSRA7JYgVSQSAfwlsDB45FcOKpwVpGknUqrlPVv2Kfif4ihuDZaNofiy2lumWKQzW5+yRjPcrxgcHADDHfjFVh8U6cbK6vo/M8+pR96/Y/Qv4M67rFlZDw/4nke4glQrLczuCZM91UKOB74NVK8mxxpKx5/4j0Cf4d+NL7RrUv5PmG809lzh4ycso9wea2pRSj7xnOKOv0fxl4L+Ivha48CfFnw3bavpWoWz203nKPMETjDBX6jr2p1KEK0bNGUJVabvFnyD+2j/wRS0DXdGf4gfseX32y0sNGEX/AAis7/6QroxYMrH73Bx+FeJjcmpyg5RWqR7eBzG0v3p+aXjH4V+L/AHiK58LeK9BubG/tZfKmtrmIqwbPTnqPeviMRGVOTi+h9TSqwnT5oanq/7HXwh/aP1/4sWNh8FPF2v6LNcgm4bSr3VIoSQMr5wsrebI7cjvzgc1vl31qVRezk0v68mcWOnheW9S1/M/VD9n34J/tV6vpUKftGa14Ea0hYpFp/iHw3LJdS88yGaWUS59G2gEHoOlfWr61OP7yd/Jnzt8Hd+zTTPqD4ZeBfBfhC2MPgnUraxLIC9ppd032Z27/I2cCtaVOnB3QqlSTS5lqddPpVtK0ax2aRyL8wKDbye446e1XUnpYhNXuMls5HAxndHJuYY7nrWN76m68yW+09XTzgq4bGT681q0mtRap2L8OnItuqxqqbowCVXqfWlbl2MUlfUmWxaKPAjAPRW6cVKhZ3Zp7VPREE1qiRvIo/1snJzVhJM0bexKWqXUg2og3E/3j6U5Nbsz9qlJx6sstq8yW42y7Sv3kDDkmqVVuNjGVOKlqjD1qeeXzJri7WGEj968j44z0Hfr2rJJRk5J79/60/p7m0KalokW9H8K6fOkVyqvcvnKi4OIsdyF9PfBrX3KkbIzlUcLouaz4I8Q63JGk/in7Paoc/ZILVSregOeMD0xXVBRjGxhDEUqbaUdTkf2i/AHiHWvhpPpHg/Sr2W/MTImo6WLZLyAEYJiaVdsbEcbhyO1Y15yhTfJuZU6vLO7+53t8z8rNT/4J+/Gnxx+0BNpV34Y12S41KEss0viNdXvV+UruuZndduOMgYAz+FfJyyupWxFpt6p9n6H2VLMqOGw6ldR9Fpsffv7Hv8AwTM+HPwF0rQtV8aaLp95q2kWxW3jhVmQSMwZpJNxxI+QMcYH619BgctpYaKc9ZHzGLzSpiVaLsvzPqpY44UAUBVA4A4xXptuTPKbuZ0uvWlzdPaaZtnkh/1rhsJF7saG+U05eSN5HyF+1V8W7G88S3Npp+q6a1pAzENNdbVll6MxJ6dMDtgcVrRoprme53YaDqI+Bf2oLbwf4t+1pqvh/SrlWjO06X4tbc+f4SvHU+9efj5U4ux7VOjaPJFHw3q/gbwnpvjOebw1ot3aXkjG38m41J7gITySoLEDA4BHqelY4Ci5S54nRCHsvU91+AfgAXupWem2ULFYGAHOAzY5J9ea9ufLFKbdmr9dPn3LbcrI+gfjLqOneEPh9H4a0nxPoM10se640q83o+cdVcEYP1r5LN8ddtJr5nZRpPc+M/GmofbtVklme6jcMcwtdmWMfQ5r4qrVi46Sd/wPaoQV9Ec1cOGf5f51hBNnf8KJoxut2qKlyFuZqqUuOBitF8Ipo1oDuXb3IqLLqVC9jT0LU7jR7xb208tZlPyyvEH2e4B71dOpKjPmiPlV7s97/Zn+LGsW/jS0/s2ae4vmlVpWUmWY4P35ZPuxqP7o4r77hjOFQxUWrtv5/ec1eCcX0PRf23/2S/D37SngbUvid8MjDNqliPN1K0szwsuCXK7edjHk46HPrX7RCrgs/wAJy396Oh4uJpc8VGrp28z5r/Yo+L/w++GnjK28Ia7pM1z4mLGE6BoeikujbsZklf6dS2AKeCnRwT+rz0kebLlwknFLU/Tax0bUvjB8Krm01SztLOG5siFsFnSV0yOCxGQD7Zr2aMlQxKnzO3bp69z1sNU95Se5+X3x2+A+t+DvifeaIqPJNNhLZZHKKzox29sDOeT3Ar26+NjKPu7HtyjGUvaK+ptfCr4Bp8RPtEt6IYbgzG10y80u5SW2nkjxu3qMPEDnG8gA89wRWOFxDm9jspKVd8qurLW6PrP9l39kTWtN8VHU/HFqkdnZ2vnywD5MqAVVffI5z3yK7q+Mp0qCUHds+iw3JhaafV6I9X8Wap+zRp1k+r6r4ekTU5RHFcNburRqQDkgsASe3v7VhTp5hUa95WPoaMc4lU5Yyjyea1K2g6h+ztrkjvaeHdRt7G7XdPeXjeUiFRxsBXkHHPPaprrHUVfmRvKOYUKTlKUbrpbcisvCn7KnxH8TXPhnT/G2oWurTIoWT7QPs4IyQQBjGfU1TxePpUudxTj1tucOJxWcU0qnJGUVul8Rg2Om/Drw346Pw+b4n6lb39pZi6vknt1a1kh37SwPfqvINZ1qlWXv8qs9kXW9tKDmoafiTnT9J+J/iHUrL4U65Drd1oblESEqjjg4JVdxwQRxk47VHtPZ006lk2Yfu6dJTq+7ffXY4t7j4n6BYanok15pE0+nSi50+3u5jbyPkqBH5mNuWJJBO0ZXn3JV6iaitU39wpe/JNXs9B1/4W+MfjTxBdw6J4Pu71pbyGLTI/tETpEmCHLMrYTafXrknjpXfTr0acLy0SLl7GhSdSpJrXReR6dpPw48M/sh+DbjxHr6xa34nLmVVkl/caeSM7kU8Fh/exXJ7Svmlqd2qSu7dzgpqtnE24tqH4swP2RP2hfjZ+0j4nu75H1FhdazPBoUmoXyxrNFG2GcRhiApAPzY4x1NaYvA5dhMH7XlSit9NTTEf2ZhcsnVq0+WMfLVn0TN8Q/Aeh+LtQ+FGv3+j3WpWjRSatHaRAtCzY2lyB6/ieDXz8MF7SCr0YtJ6+v9JHg0IVsdRWJpOSVtLvoSX/hX4UaZrlpqfjK1luoFlZ9lpahI3VgfmJYkggehA596cq+Z1KDhh7KXmU6+ZTw0oYayfm7s8D+K/iP4O6BfeIr34daZqW+ELqGkyXOpR7YoAnIaERl1Yuwxk8gZ78d+GebxUHiJLlSfMktb9Nf6/DXyMW8fGnGWIauk727+p+f9uuu6h4q1f4r6tpWr3kEtwSQIt4Bzksdq7lB9u1cFabhWlVUna1uXS3rte/zsfI4vERkrHkH7RP7QdnJp949l8ULnw7JG2BYWzPLGpGeWSbcSPUjPXpXzGNrxq1G+blv0PmMXVmtlfzPiPxRr2t+N/FUr3Or2uo73P8Apltp8cO8Zzk7AK4KVGdapFQfMn1JwuHqYmokke4fs5/B+S+mg1zV7Ui2jO+Eyp/rG/vH2r2MZj6WRYByT97ofoGDwywtJXWp79c/8SyIQy2YeDoWhmyp9wR0Nfh2d5jWxmJlUqa3NpwkzPkmDTFoyxU/d3nJr5qpPmZdO9rDo8OQD+FcVTc6LcqKuqaTFdKWRfm+nWrpTadhTgqkTJImsXKOMYPXFdEmjh9nKMrE+h75rstkZ3VMp2jYqjyxqnRSIUy3FYLVnfN3REu7dwPwquSNtTFRbOu+EVib7xjaQf2bJcI8oEixJuOK6ME4RrK5zYmEXC7P2d/ZV8A/Df4V/BrTfGXiGBMXEY8ozjBHsc1+i0aEXSi0j47G1ayqckWd/cf8Kq8Z3K30FrCsxyUfhlIPY+1digrWZyKNaC3PkP8A4KkfsO+FPGP7Pl78QPhl4fiTVdDuGu5IrSPlkP3wMdR3ryczy2OJw7dNao1wOKqU8Sk3ofkVMkisysCMcEEcg18LNJaPc+tcYqN0RNlk2MBwOKSjyoFN2K0sWDkVvGT5GiXLmlqet/si2BfVdX1JxwDDED+bH+Qr+gPAzCNU8bibbuMV8k3+p/FX0tMwUsVluCT+GE5v/t5pL8mfTmiqQg5/Sv6Dpn8M4l6nRaYmSMmumGp41dnceE0lSIvvwFx1rsSSp6nhYiS59DR1QxuzFRgnvTWxzP3p3OV1rcXYYz9aiZ6mGscjrSkAmuWaPosK9jkdaU85PWuKofQYZo5HW0JLZ/SuKpqfQYZo5HWImLMS1edVR9BhpWRx3iCL7wJrzqx9FhZaHB+JLfcGJ6/SvJxCPqsFO1jznxTZ/eHqOteJiYcyZ9vltVKxyGiW0C+IH068H7q5UowPvXBhEoVuV7M+ix1WbwinDeOpL4Dml0mXUvAl1J++0u5JhB7wscjH0NfkHFmXPBZnKSWjP6A8Ps0Wa5PFN6o6ERSy8nOBXykqii9D79QURJIxEvNZOTlqRJ21Z9cf8ElP+CZWv/t3/FuLxT46sLmz+Gfh+6V9f1LBT7e6nIs4W7s38TD7q57kV7+R5RPH1ueatBfifK55mrw1JwpayPnf4GXQ0/xlaE3l9DvbaRp19DayN7edN8qD3rzMLKNGtdn0uY0quId4n7Af8E5f2iNI8Li18O6udNtmlCpIJPiDFq11IuMfMq5Az7Yr6vC4yMpKMfzPnMXl8KDu3+B6X+098Oo/Dvi1PEOi7G06+DS20pQHajA7l9OMnj3r3aMpx2OWFdLRI+OPjD4Om8HQXGt2GVZ9Huo3hjBKuVJcxnHYgZHqPpWteEZQWpT9pJ+6VfhNc+KdQ0Ia1o2ua/q2qWlqrT2ui5W0hVjkMT5ilRztCgc46E1xRowjK9/68iVGpKGq2Ps/9lfX/G1posD/ABJFlYh41McZO+5P+9kk7q6klYhyU07Ht/xJ0JPHHhGLV9EDrfab+8tjLGdxUdVPqCKycn0MVTu7HlEAgvoV1Owv2gh3/vYT/wAu8oPKn0BrfnkluTKnKOjOo8I+KPEGkSQ3OnXgSFCSbhJiSx4xx6UVKnPFRt8yJJSjZFX4z/s8/s5/td6asXxe8OQw6quPs2u2QEc5YcbnC/e59a8rGZbh8VHVa9zow2Lr4TSMtD518N/8EwPHnwG+NWg6v4V+MWoHw4dUe51W6ttansbZ7VRlYJBC4ck9CQynAOOTXj08lrUKjcJtJ9v6t+B21MwjiqMlKPvPbQ+sv2VPhf8ACrwzrV3ceCdE1nxTqTzM9/4g1QXRtw2fuwyXLM5VcYGDt7969hUaKs1G76mcpVuT3tF8j6Lh0rSpVCnQoIpm+80Y2kk9e3X3qZtR6GLu+pbW0kRDBLHJgHCuTytc0m07MEr6jorfcojmcbgTySKEmzWLfQnfS2v7T7CUaME/6z0rZJtco+ZU3zXuXL+50vw3pfkwp5siL3OTW1SpSpQsZQp1MTO70RUu9aSPSotQupFaCQYJIwY29DWLqq1yI00qzhHdfiQ2k1vdSsyyhkQZIB49qj2kVudjhJRLfiLVJbbTorW1jJLABI8clj0qJylKyRy0oQdVzkatsmk+E9KSS8G5yBvcrlmNdjlHD0rs5K3tMXUtDYo3HiDwFq+px6fe28DXcjDy0mhG4nGfzArCFfD1qij1NIUsVRpcyehPq+laNaSjVZp5UcALEpuCqA9sDpXVOEKUbhSqVJvlRnapqkUagC9nZimGQ3ZCqe3I5/HFRSq233No4d3baMfXfA6eN9BfRdL8WXNtOTmVBfM/HcZ7jn9e1aVKUa0ddiZNU/flHU0fhZ8KfDPwqsmg0eECSQDzp95LSn1bPelTowpR0OSvVlW06HXXF/a2URurudUT1Jra3NscsITnLlijn7nxkviTUH8P+HoS5HE8zZCqveqaVKN2dbpRw8bzep5D+018fNJ8F+Gp/h54BuVM7qUvbmFh+KKT3PQnt0qqFCVR88vuM6cJVp8z2Pgn43/FvWLezklk0jVEUghiNEhvVznuFOcV1VJKCsz3qMYQp2Z8EftQ/G7wDeNJpU+maA2qXG5YbabwjdWFxJz1VlIUHvzXjVowcr7nVTlGkrp3ZyPwt0jMUE5mdriQeXBvJJ9S3PPtXoYKFo2RulPdn118CvCGk+G9Ph8V+LtXjskYB4JJ0Yjd6nArLMqyjTak9Tow1PmbbOE/ao13Vm1N/EEGi+HtdsZRtkuIgZMejZVgyH618BmLkpcySkj16UeZW2Pn2a8S4dpYoBErHiNWJC+3PNfOtKUrpWPRpR01KrZeRV71stEaTdi6BiA4HWuaoyofCZrkiYkAda0jsRJdTSsCWQM3pUyNKdrFwA554z1PrTjyy0YSR03hLxTr0US6HYa0mlWBYG7eFMNKPQ7fmc+1dWHr4m/s4PlXUxqSUFfsfWf7L/xqh8EarZaNpiNIsq7JdPcb5JkYfM03ZRjtniv1LhfNI4SrCEW30svzZ5OMl7dWd0cb/wAFAP2HYNN1NP2ovgppd7LpTuJNf0jRtQNtM4xkp5iqSoznnHI4r9mp4fCZtB1Z354p2s7XdtOj2e66rS63POnTniLpaTW11f8AyPQv+CfHx10nUvCNnoeqWdvpNnMpSx0xr5neUA4OQ3zSNnqTwOmDT5KFWiqV7ytaSZpSquPuXfMvItftzfBSHxJdR+JbTT0VF2ExhcZUHoQOn09K9ClRpvDcqdrH0GFqt0FHVu5ofsgv4M8S6hc+HNB+C+lJNa3YTUtSgDRpEEHJdwojkfIONhwB2PBPM6nK5cjafRW3PbowftZJJq2l9Gm/K39eZ9D/ABh8ceFdJ8A6nqOnXAtrS6titjcBMlygJbJHUE4HTvRl+HxM8YlUe2tj2MuwddV4KprKO6/LQ83+Dfws+Efijwxf+LNH8Kalq+ryyK13F9r8to2yfuDGVAznpivZxdbEwcU3GMXs9z6CrisTh68VOpGEH1av+pxPxL/Z1+FnxBtNQttd8R+PdFvbyPdHYafqLSW9yw6GQjA2juSOMdaU1i1Dli48j36fh+R04p4qVO1KacNLu9vw1ueY65+x344/Zn1+z1qDU9avtMvNMe2XU7SP7cs8kmfJXahUpyQNxJAHPPSssM8NNWoN3Ss1J267r5f128fCVVWqPlk79b6WPV/CP7AGtfETT4tZ+LXiWXRHbS4rSO30e8G5LcHLB8dzheQegI5q62MoQTUVeRVfNsNSXu3nPr2PY/A3w+/Zy+B1/Hpfwu8OQW+u3KSQHWonWCWcxrjL/LhskcsQck5OSa8mVPFVm6k0kt7HlcuaYxurXSUNLxt/l/w5m/EL4RfAHx14p03xbdXuppLrmhTWdzZrGktjcRFds+VI2oygllbIZWwy8gYKUMROMlJL3X3szanHM4wlCaTUZXWrT7r18+jWjOl8T+HvhZ8KvhxHpfg/wPsg1a2jt7HbcN9su0CEkyggEKozzk5B5xW+G+s4mu+d3tvpp/wRYR5ljcZzVJ3lFu6S91drPqcv8HvBOr/ERPEvjX40eBoL7wzPfpbaFp+qqYZJoRxJKxUNhfvY45GM4zkPG4ucZxoYeVnZ3aV9ei6FYvHYjDxVDAySqde2/wA/l+h6zow/Z78C6J/wifgSK28J2kVqXjvbGWKRn3BsxpySMfkTj0rzI0s4qPnrLn8tkeFVw+f4mXta69r/AHbOKXmz43/aY/ad8M/Db4pWHwr/AGf7G3E+s6qs/iHWdTfNxqDLHku8rHhQDtC9ATxgCvewtGo0pYh+/ayXRI9PD0qs5qeJfvPRJbJHYeIPjBbfELwGfCmt61JpsGq6VI97qz6iYmsX6IYlCnfk44ODz0NdSwM6MpSPSrwpUsPJ0r81rLQ+Xfifo8nwI+CT+GNR8bHVvFHiqdoX1SG5Z/Os4y3lM+7Hl5BVOBjCZwSTnzquIjgoyhVk9b20vbT5dd+2+ux+f4rEVaLkm7tnznrvizTvDGhnWbnxRHo1zHGUvJ9Cu2ukYDOBLlAVx7p+Jr5vEVYSf8Sx8Hi8VUk0qsbPsn+un5HyN+0b8VLrxlqjJB4n8P6/5r7VlsYWWYA9CQyqVPqMn2r5+tCpXq2TT/M8tUZ1attVc1P2avgBc+JrxNU1iJktkYMwcYMp9Bntmu2tVw+SYTmb1PuMqwMMNTU5bn0tDbW2g240y1gktmjGECKFx7YPUV+PZ7ndfGYiSmevzOcrplG5ncBmRcZ+8q8V8bVquUiprmIrVd7ZbgGuaU7KyCK5S0FUKMisndluSY6NQzbSeBVLbQaTG6pp8E1uzMoBx1FNOSkOULq6MPRMxXxjxgBuuK6+VOJ5/J++0OjkJcEEdetYNJM9BK0dRkYxw3pxnvRLUzcktj0P9nabWj8QrJNDldZGmABjAJ6+h611ZfD9/e5wYxOVJn64eKdF1Txh+xfFHcySLc2Q/esPlYcDnjpX6EpynSjc+IxHOq9mfMfwu+JvxZ+H+qC30rXRqFkGwbe5f5l9q9GGHk4pp6GanPmaZ9TfBn49+HviDZy+HNf05YGlj8q7sZsbZARg/WlGPIrWInB7JHwv/wAFJv8AgllqHgm7vvjf8ALI3ekTu0+oaVAMtCTySoH8q+bzHIfaKValv2PYyvGVL+yrM/Py5SWGZkmQq6sQyMMEHuDXyDdpcr3R78rLYgbBbAHWtbJIlRcme7/sk6S0Phy7viP9fqB5/wB1QP61/U3gtRjS4PlU6zqyf3JI/wA+PpS4tVOPlQT/AIdGC++8v1Pf9HGAFDV+vQWlz+TMRrdnR6WOQT17V001qeLXZ2vhgHy9ytxx8prutaFjw8Q/eRo6mSFb5cDNT0MLXkcrrB3FhgjGeazmelhrLQ5LWR94GuaZ9Bh2lY5PWVXJOK5Jps9/DS0OS1qPBY5riqRZ9BhpbHKaxEBuOK8+qme/hpnIa7ADuGa8+rFn0OGmcXr9soLK1ebVp3PpMJUehwHiOyDFmx0rycTTsfW4Oq0ked+JLd7O9W8txh0fNeTVpezkpH2OBkqtNwlsxfGRTQ9a0P4qwj9xdKLPVABx6An9Pyr5HjjBxxuGjVgfbeHmdvKMbPCN6J3+TPV9L+FF/wCPND8MX3wZN54s1HxALmK90PStNkefTLqGYxmKQgYIZdkgfIGHwelfkU8sxarQhTTlzLp01P3ijnWGq0pTm7Jba7n2v+xZ/wAEKvHfxAv7Txl+1vrR0HSAyyDwrpU4e8uR12yyj5Yge4XLe4r6jAcLTVpYr7jwsbn8qqcaC+Z+qsMXgD9lj4DLoHw58MWejaRpNmLfR9KsowibsYHH8TE8knJJ5NfWxjTowVOmrI+clG6lKTuz+ae0CSAIyAr6MOtfkk07n69VmlNo+i/2M/jVqHhHx1p3hXw7oWi6Tbu4Nze2Xh9ry+m56KeSD9SBXo5XWdOpqr+iuz5/MYOem5+wng/xFoHxZ+GkXg3VLkw3DWwaxXUbpPtWcfeMaklM+lfaUMTCpa2nqeGsPKDUmnb0Pnj4sfDe60+8k8Ka+pjdeLeZlyAV+5knsfu59DXU58+jO26i+Y+SPHvg7Wfgz4v1DxTpWuTozy2EU9hNdPFZmLyyomIQgsSQqhOm4t3HPHO8PQzrzvDlXU+qv2VfiLq+rCyt9W8LapZXUZAe603wsyynPZp7kkAe6itMPVclo7o8+NRwXvRPuv4YeI9INksBV4pXGHFxfCSVvXcBxzXSoyhqg9opvQ4n4weCl8F62/jDSYs6Vf5+3wbDwem7HqKz509/118v6/yOh/vIWe5z9npt/ZzRXdpdQPYyAeVJnAZT6+9aRlbU5pRcDqNL0jRIDDdRXkqsM7dpyrGtE4sh3bPUPhUJSUT7Sq7uTDcqDyfXgilKSitDOybPV5pte0d7aytdLVobgZZ44kEfPryCfoBXDVqcz3saJRcerNeHRmnQTQyQiU8EJHgcf0rhklNtxdzolOMNJIsrplwg3GIlv7ymtIU2lqSqkG7JkV7YRPGNzhXxjIHNFSMbGlOtyu1tAt70woIJOBjBZqmNRpWG6aqPnRg+IYdQikc20ZkUj5FC5zXNVvzanfScHBdznWg123kMJDJYXrskiuoxCx4BBPXntWKm0rPY0lCE1zL4ka/wusbyW5u31wstvZXDKskjf61vfgcD9TSwsZznepsjix9dqKUN2ehQSQTsJIbcMB0baP517y9m1oj56fPHRsr3+iz6ldLNNMiqp+6Rk1nUpubV9jpoYmFOPLa7CHw/o+nzCez0mFZR0lCZb861pUqUXdJIVWvWmtXoS3WnWOpwi21TT4riLcG2TRhgCOh57061OFSNnqYU8RUpSvF2Zkaj8LPBV6HfyZrUucs1vdun9cVzfV6aPTpZni+S2/yMqw8B+HvBuqf2vY+Ob4AH57a4nSRX9umf1rqpRUdEjP6zWre7KK+Wg3VvHNpGxc3KcScLmtXBJamsaairnLeLviRHdXK2wv8AoAc+/pzwOvWqpRvsaQ5IaRRyvxO+PGgeCvCMvh7wreh725T/AEmdAQXyDlVIBwo6bvyq/q0py55Pboc2JpuVW7Pjn4u/FzRrY3WpX6TiTZiS4t43YKATgEqN2OvQd6cpQpy5ranRRjy2fQ+Jf2nv2pvCNpbzpb/EuCIzK223t/El/AykeqiDg/U1zVKsaiutPU9ONGM1dHx/4bg1z4leLX8R6rreoXUJlP2U6hdyTlEz8zBn56VzRp+0qabG1CmubVaH1P8Ast/CabxX4gi1q/hZNOt/lR2UAKi9z9a9NNYSk5s7rKcrI9Y+L/xO8FaRG3hLUNUudImRStvIIRLEy+6/xL645r4rM8ypqo1NnXToux8v+Mf9E1qY2ep2sqS5PmabI6xOP909PpXx+Inao+WV0z1aEFymIpDeg9CKxgdySS0GqhMgPr3q2Zy1ZeQgQE46iueotTVKyM90/ebj68U4NtWE7NFy0cDAA49PSlIyWhogbowTgnFTF2Zu02iS0kaGVWRipH8Q4xWi1dzCSaZ6H8J/ijF4K1OKN7n7PDI/74WsRkubps8IvqSfUgCvosmzSeErpXsn26nPVw3O00r/AKH3H+zR8aYL6zubL4hTae2laiq28mjMwZYkIxtd8/PNzkhelfsOSZ3UhJSnPfZLp6+Zx4ig017O/Muv9dDznxz+xn4d/Zc/aZHxw8FS2kXh3XITLFePGzCD+IooXIDk4HT6kDJr7+lVhjZe2XxdUvz/AFMYU1i6ntJNqS3R7X4lNl8T9AtWitx5UiNJOrdWUITu56nODn6161GnXhKDVuW/vX7We3ne2/S57mXUVUbb3PL4YPiBb+LrD4T6ZZ6iuh3KMLWLRpBZtJdMMgyyGNjIACMgEHBwCDzXTisM1FVqckuWzbeu39f8Bn1NGqlyeylGLi022m/dvr1Vm+j/AAex9I/tDfCyz0v4WeCfhdrfii7tI7a236m2lSLHdTIf9cGdjkKFzk5yM5NfPZTjK9bF4nEw3eive34HPk2Mq5ljMdiKTceb3Yt/D2VvV9Cv+yJ+zJo3w8u9V8f2nxV1jVNHvJkbSovEF1HNdQWwACwu2TkBQE6DA6YrrzTM5U8LDCqkufW9k0rvqvnqTnmY4nCZfSy2UOaovilra/dfnueYftVftX/Cn4X+LE0rWfD2lWrWkzR2WpW+Ukw5wwBC8A45564r0sPhZrDxqzqO7WzPbwcKuGwanUrSfMleL20/yPCvhB+04moeMNV09PHGry6b4l1KWOwLurNDax8+Z1wrlQR07gg+nVOh9ZoKK0na1159j0oYvD12uWKly7XVvyPWPEX7ZHw38XeHPEVz4bNtYX4lRLSRbkJPcQxEY39wQCTt56n1rGlg5wa5ne25y4eKgoJz5kr6dE3vY83l+LDeKrq68XaZrEssmkrMomkk2s0T+XKSMd2KhTj19jVcuF9o52u43Sfk7P8AGy+42VWbo2tZdvQdo/7Tuo6Hp8+kL4r1EajHobNa6lpwUpbMCWY+WVIAZSBkj+E0VKFOtKyuk+qtf8br70zopuhiI3qQT8u5o6J+07p3xQ+KlxrPjwRX+kaKEf7LJCskfl+Tt2DA6ksMjJ+Y13SoQjQcaPuvucqbVB06Pu+a3Lp/b/8AE/xI8Z6n8KvBulG8ht9RjgXTjEIFsohGoKhtpCgHnLA/e9AAPKw2EwkK0nd8yMsLh8HRm7R/eLd9X6m74S+DPwk8Xa48+s/GHXNL1q7Q/wBqXFpqjT28L7gVh8rykUocZLADGB1zkd9fE42lrCmpQXyf9f11OvEYnF0acnSjdaabXX3/AIF3UP8AgmZ4u8e/EWH4maNf+H9f0W0+e2ksZGeQtk7iY2O4ccYy3JryZ8QZdCajVTjPzR8zis5yyDUKt4T7NaffsL4T+EujePNUvIfjR4d0fRdJ8PTTWmjDUlNjcNOhVjPwu8g9PMYMAMhQMcdmJzOMf4Db5rXtr/X4F4iToUva0JOTetk7ra1vJenqeW/tbfsr/s8/EvVYda0z43atBbQQKt5cRBP7PgmwRFC0oHmfNhmUqATsbPA58yeHePg/b+7Lp3a7ny2YYatiJ83s2l/X9fLc+Cf+ChX7N/wd+BPh2201dQvdT166tllsLy11Bv8ASI34XypGkxIM8FQuRxxXhZhgcNh6Kkk+b0Pjcdl2IpVOfdPpofNf7Of7NmreJ9Yh1fXrF41eZt+8lsYzknrj8+teVSUMuoSxM3rYvBYWPOpSR9aW3hCLwrocNhpenq6RR48u3yJFAAOSOpHuOK/MM+zTFY+pKXNePRH0XNa3KZ02uifO9EY9HicFs+/OSDXxGIqS+0XRkrlKcxyIWXAPb2ry23zHQldkVqMtjbx6U+XqyJXuWo8kEdulS9io7jvL2MM/hxUK1zpWqHTDMLA+lbrUibaizGsYQL89zurXmdjhoa1tTZnOwY9vWsmzuqfCQJKHB3H6HFLpoc0Gr6m74D1a60fxHa6hZ3LRSRygq6Oykc9cqc1WHlUVdWFVlFRZ+wf7B/izWfij+zbrfhDxNcC4n+yl4H+b5l2/7XNfpuAtUopSPkMXCn9ZTaPjL45ad4m8HeNTPomqzWpMzI6Rnqyk8fiK9GnVcdEeXXi+d8pq/s7/ALVl/wCJJ20mXwv9nGmzkXWq3s21lAOMlj1rfnVRXZFGM1J3PtX4KfFzQfiDpx0CbVUu4bhdvmhQy5PGDnqK5nK6aiXOqlG73Pmn9sf/AIJR/D74satqt58PJYfDPjGXNxbQvxZagMZwP7pNeRiuHKWOTqU/dn+ZFLPp4OdqvvRPzd+J/wCz58W/gh4pl8JfFHwRe6ZcxOQGkhJikH95HHDA18LmVDF5fNwqxat1PqMFmeEx0U6Utz179mi3Wz8AWwOQ0s00mCOxfA/QV/YHhVg54XgXBqSs5Jyf/bzbX4H+Z3j/AJlHNPE7MKtN3jGSgv8AtyKi/wAUz2PRwGQFTX6TGNkfz9iNGdJpSscY59q6aVro8au0dt4XSQxHaOcZ6V2Tdoo8WuryVi9qbbo3ZF6tg+1Sk7aGVru5yusBstuNTKDO6hZPQ5TWc/MCORWE4o97DI5bV1JJxXHO1rHvYey3OX1eAkk471xTTZ7mHmjltagI3Y/I1yVKaPdwsr2OR1u1kbOBx6YrgqxSPocLNHH69psmCWGa8qtsfR4OtE4nxJpgAZ1T65rzKtFydz6bCV02kef+ItL8x2UR5J7ivKxVP3T7HLquq1PRf2Wf2XdT/ansde8F6i0tp4e0aGK51bVgP9U7SBYoI/WWRvlA7AMx4U14sqVKtSlTqq6uepzyo5nTq0pJSafzP2C/Y3+D3w4+Bnge08KfD7wfZ6cscSCeaOIebK+Bl3fqzHuTXHVhhsPFxpwSWy8j9Ty11XSSmz6p8FwidAzHCgZZie1eVOTkz20rR1PJ/i18QbT4rfEVfD2nz50Dw4+biXOFmn9PfFZ0Y+0qp9ATTgz+fG2lKxjBr8lbXNqfq1RJ1WbuieMfGPh60lsfDPiy/wBMjuGBnNjLsLfiOaiNarS0hKyK9jSXvPc+xv2CP2p/BHwGvILO712J9U1N1WZbW3l1bWL9s8LuPyQr7DHvXsYDGWqWTv6as8TMFJ/1ofpNq+naH8evBcerw2xs9WNtvS2uHQzKuOjhc4Pt2r66i3VSb0Z4LqyjKyPln4+/s96h4w0m80PUrCKbVo7YxW1tLDj7XF18ssf4lIDKfXgd6qtytd2U2+W7Pmr4Ya1qvwh+JFxoHjKe2uoFlc2b+ItVvxAi9NpWGUEsp4C4wcDgjNYU6Xs3+JgqbrPVH6F/stfG1bjTbWPUdZktIXIEcUdlHZW7n/pn5jmab8FzXoSnCdNckvkVU5aEWnHY+sF0+18c+GZLC9V5UuIvl8+PGOO2Rn86zjTTfvGUa9ppo8J1vwbdfC7xBJY63cynS5HLWvPywt/gTWrcNkVOr7TU2dG0HWbGRUu50c3EfmxRKSUVex56nFTGLuT6nu/wk+GHiK5sINVv9XS0t5FHlwFV8w/jg4rOpWhB2vuYSVRq8I3a7s9gtPCGnRwwxXlxPdJC26PzyCVPqK5XFNam6xM4RtFWNIRWkcYSGAn6dazl7OK0RzKU3K7ZITGq8KcY7mkqqsaat3MHVVt/tBe3lCMP4S+M1z1ZRvc9CLkoLmRTv762kt0mkVo5EPysVOGH9KTqRUbjw6lN3js/k/x1NR7thoq6pZWnnNEMmPb1HfrV1Jc1LngrmagvrDpzdrnB678XdMug2nXVgXiS4LPE8GCo3f415v1lvWS0R6FHBwpt8rd+56BpDRatYxXrWhhg2ho42OCcjvXs0JxrJStZHi14ewm43ux2pa/NZYttO05rmUjhFwAB7mlVxjhPkhHmZlHDSq+9J2RnreeNLu5Ed5CqRHlorUHeo92z1rkq1MbOVmrLyOylQwVOHMnd93saR0CyihNwLW9dyMmP7W+Sf++v611Qo0ow2f4nLVqOcrXX3EOs69H4W0ZtTvdHvFRB8sa5kbPvtJx9amtiXTp6JhGlCc+VSR5f47/aBntITDcaWlujjMfnRHcy+27FTCdSSUprc7YUFS2PPbr4va5rdz51i10YD952j8qNPXDV30aiUr9DWNJX0L0nxR+HWk6RKPEonubxYi6b7sxBPVl4yR05xzWs5SnJJGFZTlax4J8QfjXFd661ro+q/MpZYoZJCvynHznIGR/9evQw9OKld7l0Vd6nH+KvinNotm+tHxfbYK5uJ5iWP0bA3KvvjFb1ZqLNZRhfU+Tv2pPjxc3Wh3eq+F/iobG7Ct9nk06Rbm3Ydcs4Vin4rivJr1Iyg+WWvp0MOW+x8GX/AIi+MHxm8T3dr488YJe6Zat5jzwW6AMAeSXQAN2xwOtckPaS0vod9CE6kUe8fs7fAfUvF93HJDYvFYIFad3TA8teQn1PU13UJxwv7ySul0fU9VUmoWifSHiHxPoHwk8GRw+DzC8CJtlbZ0bHKuO31rwswzeMrm9LC2kpI+ePiL400rxQZL2zv5o2ZyZNMu18xFP96N+30r4XHVKda7ue5TpRjG5w905YEgfSvMhE66cdCGAMXGelbXshNk0iEMABxU3ZKWpO2RBtHSpaudE/gKOCW49apWRhFlm1LFgR3wM1L2NOW5qKP3RI/IVnezNktBkT4Q5OfYVqjCauSRXcsNyJopWR16SKcEfjW1OTpvmTsTFu1j2D9nLxB4t17xPbWmlXkdtBb4E2p3rqsduvXjPC/RRkmvqMkxeMr11GDt5s58RVhShZJtn6IfDa68M/Fb4fTfC/UNQTVYWUNa3bg/LNjhlzyBniv3bIsSnTjO+255cp1HNVLWaMrwp4LvNIuLnS9VW4e7VjHKz3G4EKGwuCflHr+HoK+9VWnGhHl2PYy6m/bOpBb7726/Lrv169Do/hX8E/GE3iPS/FOhadqU2pWloPs2rS28Sae0hGGmHzBmI7dR83T04MRj8LDDSo4mon/Nb4n8lornvYnMMtw0JwxE1brFX5vTY9D+KnwV+BniDxRY3/AMT9c1fXNaRCPKstRMflErh8KCCQehx2PNeVgM2zeOHlDB0404d2iMlzziNYOcMBShSo93G99dLswte0zwCIJ/CfguS9jv2AaC3v7Y+XGoHyoJV4ByO+SM+mK9OisdGKrV7OPk9fPQ9qNfNElWxKTgt3F6vzs/0Pgn9tb9mv9pX40/FSDwe/wC1g6kzD7JrWm2hktbleeXdcqSMjLHb+GK65YzAvDe7VSj5uzXyMMRisLjUlSqKKXVu33o1fDH/BIf8Aas0nRLLxj488d+GPB0qQyRateXtxsZEPyhoooVCg7MHBPJPOKxee4C6hSbnLlS91K1193zerbu3cwlmODeJUcLUdSb3UI6fojzz4ufsyfss/DXw3e6La/Ebxf4il0a/Zn1QSrbwPdTCMSNGB85BCIBnjcv1rooUlJc9T3ZyW3l/TO9YCdCCqVE4zl57evQ5T9nmX4o2HxJk+H+q+E9Xu/DN/YmDTtTTRpCJFIYjzHVOWGc5OSRxngY87BYKthcXUjOTcJa6vb79vRaddyY1505clV7bHqfww/Ze+OPxD+Kj+AvBmkaxOZPDK3WoWcFt9mLOrusQkMgHGAQM9Qc4r0MVXwuBgqtaraL210uarHYXDUlXryUY6pN7dDu9X/wCCZP7ZfjG4NtpXgVPC9tbWqQQPO8O66ZiAxk2HkKCTuOTkAdOh/b2VTo2dZaLSy/P+n2OWtnuScrSxKv5K52/wc/4If+Jvh1JqdtqnxfkSx1pd2q2sknmSyTlOTHIoQrkjPfsOeteFRz3BYWTdO7bPMhxRkeErudJTlzW3f39upwHjP9iX9l79m3WNR0r40ftJeO0t2IuLxV8qEwgZ2qk7DfkntGRkY3Doa+hhicRLBOsuXlls5PXT8v1PVnWzLMsE6lBJU20029dPPdLXbr8j3r/gn78UPB/xLN/D+zpY3tt4T01Atxr+raz5tzdsDgnr8vA9s9uK8vOMPSpUKdSu1OU1olZ/et187d9jhzONH6opYv35bJW0ubP/AAUS/bF/ZN+FvwzfRPijBp3iG8e0eKDRZjme5l7YYNuBzxn3rz8BgcVQvVqy5IP+tjy8syzFYPmr15ckW9En+Fj4F8F/BzwB+0hZar4h/Z/8Z6pDqM1mG1PwB4g1eSSFDtJiEEiYDbSc7eG4xk4xXvQw8a0nNSdmreX3G0518Vo3aKe9tTzb4o/Az4s/DXwtceDvF/g208Ya9r7lHbWtPMy6AVIw9qpz5Y27huPBxzXHmGDlhKPNF86b69DyMzwFOtP2qvZdupgT+CxpHhGez014YioYXckIESzN/EybQAFBAx27CvybivFyqU3ThpffsedSpKmko3su5y8eo6rYWkdhdTSPGgDQGfPmxH1VuuPxr8prValNcrPRjGLjoiHVddnv18m7CyEEESyRgy/TfjJH1rza2Jq1VZmfsUqnMiorGUjHA9a5LXZ0qSiOiG1uap7Ca5tSe1JaTjFYSZMVqTSrzkj8MVKudUVYdIhaBjnt1rZSJmrxMmxX/iZEf7XWtvsnDBctQ09QQjnPb0rByTO+VmjMEsqtyPqM1poonFKNpXLWn6hLa3KTwSbXRsq4HSnGXLNSQKMZKx+j3/BIv49apD4iPhXxFNdzQXKeUs1wFC4Ix0Br7DKsd7yi7niZtQhCnzI6H9vH4TXOieNb1Le3CxzObi2lC8Z619RRi1ufP+2hJXR8d+IfCt/fa/bXVlI4SCbzL2wjOBJIOhIyPlJ61c78yOarKpLY+j/2V/inqmhTRHXtBuDqstwFg0/ToyY4kHd+OPqfwrZVIQV2jklGUlZn2t4ts7n4tfCZdbs4PI1fSo/MhY/eKjquauniXCfuo5q+E9rBxZ41r19oHxA8OHw78TNAttUtihVXuIlZ4T0yp6jmvfpYbDYyKVeKafc+Ixrx2DUnh5uMl2PnbxB+yPa6Zqclt8MNUtY8OTDY3cixKwJJAVzwOvfFfteUY7CYXLoQjG0YpJW1SSP4S4pynNXxHXpVXzylKT973Xq77vR/gc/c+GfFHgfXG8M+NNAudMvkUN9nuo8b0PR0PR1PZlJBr38Li8Pi4c1KSa8j8+zrK8bltTkxEHF+ZvaNF+8BFejSTufI4hnofhTSZ1tv7QSQBQMEHvXRWmo+4ctGhVmnWjsnYs61axLG7w9C+QCOlKk5O1zlxNKMZ3hscjqtqZJGVvw4rWpojaleNjmNV0u6Zz5cJb3ArinJN2PXoVYpWuVPBfgGbx74/wBK8Fhmi/tC/ihlkC58tGYBnx7Lk/hXl5liFgMJOu1flTaXc+nyfCzzHHUcNB61JKN97Xdr/Iyf2gPhWPhf8R9a8LaVLNdabZanNb2GoSR4FxGp4bPTO0gnHrWGW4uOY4CnXkrSkk2u1z6LH4CeVZpWwjfMqcnFStZSSej+aPKtV06cyEFCc8jiitbY68NUVtDJTwhqmvSyW+nWTyukRkZUTJCgZJrzqkU3Y9SGK9lYk+Ff7L/xj/aF8YjwJ8KPA9zqd9jdOxAjhto+8ksr4WNR6k142Z1aGX0+evLl/U+xyHA47OKqp4OPM326er2XzPZdc/4I+23hWxutM8efHPRNX8SvYO1l4c8NXqwxC4wNqyX1wvl9TyFU9MZGc15NLMqVeCqexlyd3/lufWV8pxOXVFS9vT9r/LdvT10V/vPkL4ufsK/tR/CaWSLxr8BvEdsgDFbmGyNzA47MssW5GHuDXkYnFUqzagz3qdSrhKlqiaS8nY+lv2YvA8/wM+GvhP4Xyad5V7eTf234oHlHe93KMQxtxyI4sADsXf1ryq8/ZJRtruz6rh+k8xx312/urSKt/W592/BrRZdRu4YYk8tnAkMMgwQD3IPT8a8vFUoykp3tfXT7tf8AJ+T7H7DhIcsOaRq/H79oCx8HaRL8NfAWopJqEq+XqF7GwK26nrz615lSTnpHY6k3XduiPnXV/if4X0LSjoM/ixoIsMzogGZZD1YsWHJrpockIWR0K8absj8fbUgxqSeor8akrzP1Gp/FZpWyKwHH1qGtCE2dP8Ote8Q+HdejXw34wt/D7TNifVJkOY078qC34DrTw1SdKr7rtc4cXS9pFPdn3h+yT+1d4K+E5s9B8N+JL/xFqepMAsl3ITd6pJ3fYTttrdeTvbGfrX0+ExatpO7/ABPMrYSUEnJWPuLPg/4s6JHNrF5bReIprVZYoYJ/mTHKtwPl574r36U4Tkjiqr2afstW+/f9PuPlz9r/APY48QeInjvtO0iBrtVa5nuokx58iHckqYGA4+bcO/BHOaK04yTj1Hz14cqgly2d+9+lvxvsedfsh+K/FHhTxdf3Pj/VjZ39hdmOXULo+deSJgYESn7i9Rxgmpwn7id2ziqN1Z33ufpP+z18SrvxxaxixxFbAANJNP5k8h7ByThSeuxckd66pVOew/YqlC7PSPih4I0jxXocmn3KxyzeX+8AXOyko3Zk530R4t4Y0PxCdYu/CGoM6x25SSG7RiGVFPAx35PT3raTUV6HTStHVn1J8IfD3igQRXd7p1xcQCMAS3swQ/VUHSvOlySdwnUjBtbHp8cUMEQx1785rVcqieZVqSk9CHfulxHZtjuw4rCVJylZIiE2viYy8+yWsYaZmT3BNY1YRoxPQoynUdlqcz4iGk6ihM7ucHCyRygEn8xXnSrQbPVoxqxVkvwM3QfCfim9uStnrUclju/eC9G7A9gDz1ojh8RXfubBiMVh6EFzr3vI7nRtLOkWpgt7nzh/dYbQPYV6dDDTw0bbnhV8ZHEyvazK0ujaTPd/a73w1AZP+erxq1P6vSqSvKBo69WNO0Zlq5Z3hMNpEGJ4HOAPr7VvUpe5ywRxQnLnvIZa2dzp0eY4lllfmRy+PwHtWNOhKlra7OqVWNXS9kS3uqXOmwrJHpM9xlgH8gBtvvjNbzlKEb8tzJQVaXKpJepYW/t0txdXr/ZwennMAaUq9OC97T1M/Yz57LUR9T08R7mu1ZGHDZyMU+elON76FKhUlK1jwH43eK/Dll4zlj8P6Vb38S2zS37SYJVsgYUtz36DvXLBR9o0tj1aVOooKM9znH+Eo+ItmPEHw+1kG7WPcdJvXO0cfw9q9SFGHs9zVVPY+7P7zyb4ht4p8GWF1beMtISHUIFby4XjEQf05Y/N09qqK5OplN9U9z5NuPG/iY+IJ9UfU1vnklZxDeWyRMCTjy+m51xjBJwM+9d9OTSsOmlBtnnnx0/aB0mysyuv3raNeEFYoJHMaMcdNzjaPTBGK561WKvcOZXuz4O+JVze/Er4mL/wjpFreGbfJd6ZI1uUTPDSCM7HJ55B59K8qVBVXodeGoOT5kz3v9mD9lPUvErC+1iKW30YRB2MikG5Ktk59QSAcV1Qn9WptHs06ShDQ9+8YeNNB+GejDTvD1kIoLaIGSGIbXYD+Iep9a+fx2ZKW5cbx1SPnf4m/Fs+KNXGueGNSa2lf5ZQnKXC+jr0zXzWOxEZR5ou56GFXM7rQ4uW7aeQyuACxyyqMAfQV4Mvfk2etFOW4p3SDA/PFaJWRt5CW4Mb5I69vSpkZO1yaQncO/HWs73dgWjFaTMJyB0zQ3Y2lrEz0lYydO/FHNoZRi0zQtVJOT69qhybNuaNzSBJhC47dal7lKQwAqMgdferUiZJESoS+H7/AJVs/eWhg3Z6HZfCTwz8QPHviq38P+DpFjSH57m9up1htLFO8skjfKoHqefTJr2MnwuZYrEpYd2t1btbzOWvyw3Wp98/ss+J/BHwwji0vwPqsniK7YqL3xTOjCGZ+628TclAf425PoBX7pw7haUKfIpcz6silRlUV5Hs3hzwx8cvGfxy1GD4XeEliSaBHl1i4QiFd4wwHTtnI75r7zEYjKsHl0XjJ3S6LfQ9hYrKcuwvtMZOy7LdnsOg/slfFfR7CKDx/wDF86iFsAkYicwmGfPLqFwPu5Xp36V4P+teV1p/7Ph7a9r3Rwx4vySU+bC4azvu1e6+ep4l8aP2CPHuveLD4r8LftGXui3VsCwhe2Vomc9ywXLc4z6j0r6ClxHQrU0pUW42eisvT8en5bn11Di9V6UVGm4ra0ba+qZ5D8Qvg5+2P8Nkn1Xxd4eh8a6asW6TUfBN0RdbfVomwQcc5GemK9rLMwy6vQnzz5JRXuxkvid0rdtrv5WNqfENG75k1fSzVv6+RheGP2+dc+F+heV4c+Nt5GLcmG78O69ZeVd257HLE5IPGABmjEZVlmLrL21FXet0ehWw2Q5lBSrYdNrr1+djz342/td+O/jx4XdvC/xRtrrXoVdZrC4ukMN0mSURSMbjk+gNaU8FhsNeFGKUfLcbp4fD0fZZZDld3p8u+579+xR+wDqGmeF4/wBpD9oXwlp2veONUdbxrGeFUgs4+qhYV+QEDPRcV8/iMwoRrfV4ya6X3/E+YxGZQoWwk6jUtnLfXtdnulp+0teaPNdeFtN02ySSF/NjNlaKRhRloFBGQ2Bgj8qipkFGo1VnJ7W1f4nHV4bpYicaknJp6av8f1Om8LfG7RbCabxPpdxFJrdzYtPfMLVUVolB2qHHOVJxg881wYrI5V4qlJfu07LVvXroc2IyGpUhHD1F+6T93Vt36trz8jybXP2u/H3ijxjqei2+qzrEXt4bdlGVd3JLomOpAHJ7bvavYw+TZfhYql7LWKu3+n9d0ev/AGHlWDpR5aabX9I9E8BfGLSPHXi6x8Eya0L9tOKfaro4H+kZ+4M9cHg/WvMxWA+r4edbl5W9l5HnYjLo4bCVa6jyt9PI+ef+CgPwe/YN+Jnxk0hv2qvGfis3FwCE8MeH7orBcuvBMwXHQcA+hrKnTzDE4OMaUI22Te9jvwWKzZ5XChRUVFd29fl1PHP2jPHf7Sun/D6P4Df8E2v2S7bwN4TaMJJ4k12SGJ5APuy7clye43DjrXp4XKMdCmpuoue33DeEzHEWhOalLdX+Fei73W+583eB/wDgjv8Ata+P9ZT4m/Hr4qpq2qzXSvFcSXBcKCecevsOlVTyqrF2xFbmZyvLMTRrc1etzWPpz4c/8E+viX8M4biy03xLPHJaSRXLQG7MFvG6kjzZNo/eSAFsA8DOPWvUws6FBpRno/xPVjXw0MPyc2j6Lqz0e21Szk0rUtR+Olql7pCQpbT6kloDM5Hyg7j2HJPbFdNeEZUnGn6s4amHjy8tHffc+OvjF4b8Alpofh7K8Phe3vH+z35s/Il1hgxKQwRj/lmgIXI4PWvxHjHkqT5oR5Ka6d2u3kfOewrxk5VHdtv5a/pt5niHjJ5b/UTbR2eLiNP+PeEcW6DszdzjtX47j8R7So0kXCeljm3RW5I44wfQ15L1Oq11cFIQZI78jNK1jmk9RytuGc/Sm9jWnqiezJzz2PQ1zyV2Nx5WXJAQc4PQZqVozSD0BFBgYNT1uVN6GTaJ/wATIgD+Ku2K904Y6zNi8VWTBGTgda5JJpnXZ2Ma6QRnIUdeK2itNTKduUrxTES4U556VTdonLC6mepfsveNvEfg34taZq+j62LVY51MrSzlExnvXdltWUa3NfY4syp+0p2sfrp8UNI8OftK/Ay18SaBqlte6jZWg89rZw2Tiv0ChifawXLqfHulOjKzR+dnxr8Pa94T1warp0r29yshhm3Icbs/xY7Hn869BWcbvcHSk3sdp8APi/8AErSERtflSK0f5GgaI+bdgHorBevsSOO9VBOWxnKMOdI+/wD9l74gWfi7SktrbTHtkZdsiSDOcjoatxUNR1aaSucJ8Q/AGp+GPG+paVMUWETGS3x3Rua9XL605Rsz5jOaNOU+aGzOQ1XwvaXCSQlC0ipuj74PcV9vk2YVKD9nfRn4H4h8NYTH0fbuCc11sZWpeArb4leFpPCWoEfaYkZ9IunOTazYyACeitjaw6EHPUCvqcNi3h8Qqq+fmfhOZZBHMsG8O1qvh8n29GeN+EEuLi4NrexGOaGUxzxnqrg4I/A1+j0ZxcVJa3P57zPDSwlWUXuj1nwP4W1jWZvJsInkjQZYKOlTi8VRoQvLRnFlOBxmZYjkoptLV2N7V/h9eXELtaIXCjLY7GualmNKHxaHr43h3EVE5UdbHOap8EviLLYtrlr4K1Ga2BH72O0Yg/jil/beWSqezdaPN2ujKjwtxR7D2qwdRx7qLt+R9HfsffBa3+HngfWNW+Lfww0i8m1WHbYRamgMypggg5B2A9c9a/IeOuJKdbHU4YOvKKhvyvRv5H9beB/hZXweTYnFZ9l9Ocq1uRVFeSVvPb8y/on7Onwjl8Y2PjnwVpkWk6tpcLQ3WlSgMLpSTh427kDA9eK+fnxfj8ZhZYbES5oyd0+3qff4Hwf4fyvM6WZYKl7OVJNShunfqvNB4D+Fvwf8RXN/4S+L/gqDU7eS5na0g1GP7krIqByRzjH5EA1zZhnWYYSEZ4Wo4uyvy9kfTZbwNkOZUpUcww6qLmlJKS2bSV9Pl9xwnh39hD4M+A7TUL/xz8OE8Wa7e3brpVnbF0tLODayoWwfmbkH3wM104zjXMsfOPsansqcUuZ6czfU+QyXwYyPIqVT69B4itNvkXNJQiumzu2dP8E/gN8EP2RNA1zVbnwFpuveLbuyka6WaESW1jBgKsPzZySSAfWuXMM8x2czj7zjTjbbRt93Y+l4Y4CyXhSFSUqUZ15J/F7yiv5dfxOT8U+PfGvxc8PeJNVv/h0lsukLEnhyy0CxEEUqsuAXRMb9pJxnp+lelhKOFw1WnD2l+bWTk7/mcWKWY4yhXl7Br2elNQjZfcrXPC/GPwy/aL8daleD/hE723gtbGNYYbqwKLM7EfKrDjOCOuOlfZ08fk2Ew7vVi0+lz8izXIeOc2xUn9XlGMYq3u2u+yfcT4QeGP2+fgv460Tw74U1i/s7K/V7fUNO1y2820hJJwDyflK7TnjBJ4718rj8VkeMc5WW+jW7/LU+w4a4f4yyuthlTlN8ytUjNe6ndqy1d1y2d7J3uraXfuf7EX7MVp4a+LniX44ftCXVpcf8I/PLPcMUDQLLztC54OOcfhXx+ZYuUr8h++5Jkby+leSvbf1PP/2wP+CmPjnxH4o8S+A/hRoel6Rp+oW8ds+sG0U3NrbKT8u7HDvnOOwxXiUVOc9WfQSjKSXZnzhpXxD1bwpZfa7r7POJ/mlvL+TPmE/3uOK9GmlTNvZ8sPdOC+N/xNs7rTZJLnw9FMskR3T6feLtx7jPOK2U09UHJPlsfntaOSiivyN/Gz9SqfxWa9iCVAbr2FY1JWRMbcxfiOTgdRWNubc1tGOp1fw5+IfiL4d30s/g42NpfXpVJNUu4t/kqD97H8WOoXpnHpXXgsRPCNqOzOLFU1WScdz60/Zt/ao0f4ZtDeN4gv8AVJdQmHnS3c6i+164B5Z2Jxa2qenU9OSePco49Q0i9X07v9DkqYFJ3l95+gfwi+OWhfEi0k0fxXHaTXDWKy6jabP3dhE33QcjKsewPOBnAr2KE3OXvv5djx8RG7tE8u/aY/Ye0XxskXxI+FU6ie2kD27+UXZSMnEikYdPrXrOFOtT8zmjFU3aSepx/wAA/Fnxm8GaxD4J8T+PLqxvIrh8lYAqxREgYt0B2Bm7ttz06UqceV2b2M6s+eKitj7q+EXjfRb7SYvDVreMJVQPetNLvfkZ+dj1Y56Vs9tDgVlLUt+N/h7NLfWuoaHaxzXxn3BGPXJyM+wrlnTcVe7On23Q9p+FWmeIl0xTei5Z14lubqUqmfREHUe5/WuaNJzlzMydWKW9zt0tFQ73kd29SeB+FdMaME7nJOrrogMqxgmV8AdSTWzkkYtOTuMTWtOJ8szh+3AyK46uIw70ep2UaNZq6INS0fwtMEfUtFgJmYKu6POTXFKhhE7yjudcMRio6Rk9A07w3oOiyu+l6etuX+8sLEKfw6Zr0KNClS1grHJicdWrx5Zu9i2qsvANdd09TjgluR3V3FBFvkk7gDJ7k1x1cRCGiZ0UsPVquyJfLdSFLflXVF3iZ2s2mKsiscoxcE4GKx9rC+hWttEJNeXEU3k21pvYdWaQAVlOrK9kjalSVuabscF8YdK8TaOy+MrKym1C1Tm8s4pCzx9MFV/iGQM15WKw96ntHqezgMRRqfuXp2fczLfU/H/xG0uK08PWJ0bTVCiW6uzhpVxyR3/Tn2xXfh6blSd9NunQK0cPh6nNe7LP/CpfChsZ0W0uNav7iPaZ4EWNFPqGACjH41s03U5tPkkvy0MoV2neWi8yLQfgn4p8NxRapa3saTw8rbJMx+XrgMQOfwrthKCerMquJoTnZakmvad4C+M+kSeCvil4bVrgMYzM6hJIT2YE4I5+vWonCXMnAwqU5ppweh8B/tyf8E7vHPwdvLr4m/Dj7X4jsnwyqtwUAVc7UmIV9oGeGUdeoNdlOrzR952ZrTat7zPza/aY8E+NPGvi5NCv9Oa4luxiHQLqJZZCV6STzkABR/dABPA5qfY1a75b62vrpt6/09kddCg6jWh2/wCzb+yJ4Y+Gmgrr/jKNfOkk33KmHaC5yAMEcICeO1cs6tPDxtLc+jwuHVNWR6F42+Ndj4VgntdEj8uG2l+z28IARd5AO0noCB0PevExuYQTdmayjZ2XU+fviR8Wr7W0Ux3odo2YpOPveafvKQeQuPXjjivj8bjE6Titzoo4eUZ36Hl1rcb9RaZQB5jEsF6Zrzowfsk7nTSpfvTbhYt1NZqKR6raii/bJ8mWHQc0pOxKbbFaMK+NtTqy5R0uI4JwMdqlJGcbXJfLVYCCOcd6iSdzZ7GcExMcLxn8qtLuZ8xftHweRS5bCSRfjYlAp7dDWT3NYu7JJVwORjt9aVrhNWRXGS3zfke1dENEZx5WyawjK3kb7EkCyBvKldvLYg8ZAPP411YatXo1E4v8TRuKV2j6e/Zh+It2dVQa/JqlsFKbJ4IVMUpyP3Y6eWuOpx+PcfsnBGZOGJjTnF66X6a9vT+r7HLPmm0oOzv0/rrt/kfrF+yvc654X+Ecvxb8ZyNaWRgK6Xp/mBsqCQHJxk7uw5xX2fE0sLisyhl2FXNLTml+nyPmuIIQx2Y08voK705meYfEj9p/XtV1O7v4tTeNVlASLldwxuO3PXHA98kdjj6nL+H8FhacYcuttz7LBZfgcDQjSVO9upufDT4x2HxBQWc1wUuo3wZSQDjGRuHoeOma5sfl/wBVfNTV4hUw1NOUqW3b/Il13xDLoGrNLZhY2LbX56k5+QkdVPY0UKUK0LT1NY0lVpqM9V0OM+JX7Ov7Pv7QEsOt+LfBNg1zLEVW+jhCOWGMxS8d+zda2oY3G4OfKveS6Pt5HXgsbi8A3FLmS6P80YXhL9ir9kr4cPBq1t8HIZNQspjLDMEDgsTgvtAx5nT3rrq5lj60bRklD0Oz+1cyqz/dSjFPys1/Xc7jxvd+Ko9OvNV8P3M9jqphIt7Yltk8AyAUz3GRxWGGeHdRU5pSj1fZnnwdGTUJRU4LVvqpeZ498MrD4ifEzRr240Lw/NP4i0HUm8y5gh/fXCbiFdl65I/nXqZlicLgK37ydoPa+x7DxeCo071Z8sXor6I95+FX7GXxRmudWvfEzWlla3mlvHZQsdx82QfMzLjjoK+RxnF+WxUI07ys9bHyObcZZTRcFRbk09bdkcde/wDBPnxt8O9EXxNdePbCXWoYbiOysGciNpZWAR9x6Yz6dz1rb/W/CY3EuNKlKzWrNaXGWW4zEclOEuXe7PA9S+IXhH9gTw9deJvib4+0TU9ftLOaLQtMtbhQZHLNI91M5+9IWzgckKFXqa68RW+tUXbmServfotlf9N35np4jGxzGl7Ne7Hdt6Xstv63PgDTvif42/bQ/attvE99qt9DKbnbZyyW3ysGYmSb5zzg4VQBjn250ymVfGYuLStCK6iw1eGM5Iw05NPVd3r6dP8Ag/pJ4E+F0Xhu9tvh+qNJNcsDcSS3JmkmUKMvK2BlmxjaOAK+vqYqHsue+yPclP2NL2yb0PSviV4usvhstvZQWcL3YC22lQRyAmSRmCmTB4wCQBXjYelPFwnU1stX6HlqTxMHO+j3/wAjyP49/tKaBotjJ4APiELp0EoGt3aSDzL662lmijOQCq4OTwB9KwoxVOoqtV2eyueYsVRoydeXp6I+YvCH7auo/FnxXNptnd28XhDTZhamGO4iuEvZTyYkG7DkDjjnOc4xXq0qlOVRx0bstU007q+6/Fbp6OzNcJi8Pi7zi9L2u9DS/aN+Dt/4/wBKX4r+E/E15YWcCBLvSJbEtc6aMAeXbouFOe7ZwDnnivzHjnIZY6PteaUFH4rK7XfS61+a9TDHSpqHuWlbqno/M+UPEV5bafcz+H9GtSJTkSxpNvkb/amkHAPcqpP1r+e8YqVCbpQ1/rqeQuWbuc5LbhV2Aj3IHH/6q8m7UtTug2U7oPGhCjHFJy5hTjfUXTkd0+ZvpmolJoVNqJetlCOeO/X0qdSpPmZdxvyDjtg0ramkEG0iJsjtWi1Y6i0MmxXOpHP96uqN+Q4aTvOxqXw2qee1Y21O9rQyr11I2Y4qJS1OVvUpQJ++znoeoqviViJrl1RftZGjlB80pgj5gcVLi11M7RkfeH/BN79rjwd8PLy2+HOo+IHne9ITyPJbYM9iT1r67KMfQw8FG+p5WZYSThzxWx7b+2T+z3Y+KLCfxn4SjBgvId0yxrnaeoPFfUQc6j5k9GfNSrTfunyhY634q8JNbaxFYx3DW8vk30NxgLEw6S84HSvSjFxhdbkRgotuSuz7Z/Yy+J2vazpsN0qQRWpwySRpzJ7n6+gzTjzyV2cFerzppI9p/aF0Wa90+y8XQBmdY9s+F7e9a0a6pVLLqefUw/tqTPE7spFMJQxGVJJPcV9TgqyhNNn57n+CdWhKI7wjpcq3cc6ngsCDj3r66NdSjdH4RUwXsqzVupxGjfs4fFP4m/tD+KtI+Gfg+W9tob5Z5rofJBEZVD4LnAzz0HNfW0OJMtynKaVTFzs2tFu3bTY/B814E4h4q4ixOGyrDyqSUnd7RSeqvJ6I+mPhz+xBrHgG2XVvij8W9N0ZAwaW2spgzYHUMxxXy+Z+ImGxV4YXDuXnLRH6Rwb9HDN8vrrE5rmMaPVwpu79Gz0Xwr+z1+zL431B7Twn4wvtRuY5M3P2K9yAR644HNfIVuOM9ptxnGKVux+4YPwR4Br4jnpSqNrVtS0Z6zNpOneC9Gj0q78RRpp8EOwWzwqztjuSe9fB47M267q7SfY/astyOhhMPDD0leEVZJnAeMtM8CeIr17izvNRuGbO8C4CgDGMCvKnjJz3d0z3KOW31tscxZeDJnuLcWmhTiNCfJuTdk7TnqR2ohiJxskbrCUGvP0I/GUN9feIE0/UbJEn+7BdxL98getdEsdUfut6GM8FQhK8SzN8UdZ+GekQ6MlxDLPdwlo3kQF1UcHmlzSsc/1JOXNY8u0fQ/GHxB8dXunadZ+XYW0Il1e9nY7Oecf7R9qv+0MRD3Kb6FUsjw0m51FudOl74mhddK8K2TWsEA2ieODa0+O+3OWrGFfEVpa6s9RYLCYeCUYpBrmj+Ok8NSajrN7qBg6ut2vkhcdMZ7V2Qcox1ZwV6VOpKyicZoOpeGNb1SO98T+Mr6IRnAaVyRuHA5Brop4unQSd7nLLBR2sTXujat4i8O3Hw80LVFvdNubwzXEFiSpfv87NyST1JPStak415XTsdNLDyjSaSvc+G/jp8GNe8E2fihB4Y1CaW41Qy3OpNDI0fmOflijbHzEAAYHSnBRimoX+486cJU5cr3PlT4yfHh/AljJ4Qvor23vWxE0DWTSrLx1GeK56uIjT0loUoSk9Fdngmqx6nrs76jq9/Lbw5JW1tZChcf7Xp9BXk4rOKdCbp0pXV91/wT38Dlzkuaoji7FdwUewxXyEnaZ9fVbVVmxauBgj04rGUVJDpr3jQtSzD5hgD9Kh2idE1dEu4yHCj61LlFo54u0jc8F+KNQ8E66nifTbWCa+t0P2Rrpd6wyfwvtPBKnkZ4ziqw9b6tV57akVoyqKyPb/AIYftgeIvhz4Lh8I6H5uq6tqmsC61KW8lJbU7on5WnbP+pj4IjH3iOTjg+xQziUY6K829v8ANnDHK1Oau9D7l/Z6/bztm1C08PXWuW9xFp0aprutMvy3t+wB+zW6D74XnOK9ynmUo1VG+iWr8+xGKwsE3da30R9EXnw8+E37S+jW2uaa0Gn38sbyRojbZGIP30YHgAg/XPtz7dGSrQ82fNYhVoV1ytctndW1vpZ3vstbqzvdaq2ub8Fvhj45+Cni0aRftcapaTXTy2ZWPkyNtG6RuWY4UAemPrXSpTjHlvorkLCurqlqz608A+G5PEILNZSOZCPtEkU4Vx6nOeB9K55vnerIlHke56tpOlR6ZZRWEJcpEMKZJCx/EnJNRzpHFUauW/KYc4pORKsytc25YFQgJ7BuhqottF8qsJaGVVAmtzGe6jn+VT7qWqHOXLomWZZRGASSBkc4pSlCEbsiEZTZmavr+l6Tuku7gbgMkZ6VhUx0V7sFdm9LBzra9Dnf+FtWJ1EWBkhUSH9zKvOfwrnWIxNlztK50wwdJO2rMfxt47mW5EocBbdwWOcADgk/lmuWpJyk31R3U40qC5Vuz0S8v7e309dRBHzopQE9SRwK9epV9lQu/wCmeJSoyxFexnX/AIq0/SbCe8Z122qiNEDfekIziuKOJjG9umi9Ts9goySfXX5HM6p8QrXQmhh1Bxd6ldNvhgU5EIxnn0IFbUoSqPvLyNnSp4h25fdTMkeNr7xZjTrC8kQSb0lu5UXypHIwqcjkAnt6Vz2ctDeVOnCaktkdD4B+H09tp0Vx4r1dtQlVQFQArCvHZCe/XnpnjFelTpxilfVnLi8XduMFY6vUda0zw3pj6jqVxHBbRLkseB+FVOUY7nmRg6suVbnj3in9qc6r4qt/BHgs28Etycvd3EoJii5+fb/D7Z/Ko9pHoepRw1CjC83dnivxZ/ab8M6L42/s/wAHa82o/ZZAL2SOFpSZB1djtIyecLnPsK6aEnUV+hfJOauejfD/APaD+HPjPw3LpfimG4l85GBkuJmjByMbSqDA/WrnCbq3jsRJS5lY+G/2pdA+Gnhzx7qHiLwpo0YunJd4SwExxnGN6KXHvV4qvGlSu9z6DAr3Ez5R+JPxwt4Eks0ujudXV4pMjaD1VgO3oecV8bjsxd2etSU+XlR4V4v8fXmoQ/Y/P3qGb93Iu5tp6hjwGyOjDkYr5qvjJzOmFFRZyF3eyXD+ZJwQMKCckL2BPfHqea4aknNnoQjzIztMdzeleuXzW9NWhqYyly1NDqbaMhQD1PtXNJnUnzGjbsFHPfr71hKTZUXYVuQc+lPm0N3rEgabBwBnn0pJvqYJWZO7EwEjNK5u9Y6GfHKVlKkd6q7sYKLT1LdsQG5FZSk2NtNaF+zcuQpPNIun8RPcAgZI7U0+xtUV4lVZT94N7VvHbU5krFzRoftuoR2K2RuJJW+SFSMsfbPFdOGk5Vko7lShSkrVFdeZ9Z/sYfCn4ifEb4g6T4cj8BXawSTqs0lzp0Yj25GSS3oPQiv1/hKFaniFVrR5YwTexP1vD0bzk7KJ+mf7UPi+bwp4OtPhxoFvCltYWKRohlEaNIF4U9x9QDX3/C2D9pVqY2es23a/+ep4+Q0YzrTxdR6zbt6HxD8R/HDQX15c6lc3DyNKsi7Imba6ncDjGFdcDjGHGcYPX9H9nTcUj6CdeUfdgtjC+GP7SM/hn4kWrw3UUFtOVX7Od20kYBAzn5WB3DnGeBxiuXF+yqQ5I9SqVeNP4kfUmt+PLLXoFmkvQ8dxBjzE43RN9xuO6EgH6V5NLDRpLYcFJU3rfVtffp9yM7wD4y1W8v7/AMDvfObme2EsK7OfMQc4x6lWH5VviI4eMY1pbp/gap+yaqyR28fja/g8Gx+ItM8P3BllP2e7wAVnYnBdUwWBB/j6DGc8GuFUKdXFuEpaLVf1+nUt0I1MS4VJ+6rNb3Xlft/TOaubL4y+LNdtvCNros+pxPeqNMkGpxeZanBO855KqTyCORXWq+TYWnOq58rS10ev/BN6+Jy3BRlWi0tNdHZn118HPgb4V+EsT6lYWKf2rfwxDU7lPlErqPvbc4HJ7V+U51nWJzefLJ+5FvlR+P55xDic0fs7/u03Zf8ABO8u7z7HZyTmMtsTIVeprxKdNSkkfNpSnNJdTwb9oDx7qNndi4i05ZWi2qyMuQm7GOfXr+dfdZHhaEaWr3Pr8uoewpKz3PjX4tfGrTtf1a1sNa1HQtJs/Dyyyatc6vo1rcMYFDLiMyxlmkUgYA25PUnv9PGhRjTcpJtvbVn0DmnSule588J+1P8ACr4z/HXQdT8K6es1r4LikSHVIoYreS63HpNGigJg89uK9fLalCnzRpvf7l5H0mTU6LV07X/A+h/2dfirpGs3N14/1iSGOC3h8uwcvljHubMzen3SeewHrztjIVJR9nB3TPpcS41MMqFPo9fPr/X+Z8+ftEftceGtP8S6l8Qda8QlItKs2i0Ibh0HDSnPGeh+p4rmklgcLGLlfu+vzPnsbjoYag4rQ/NX9oP9rTxN8S9Uu/DnhxJrLSG3GF/7SZZ7wPyxYlQEDdMgE46Yr53G5hisbUdJJWW2u/r2+9nwGLx9Su/Zwlo2c38Kh8afFQh03wTe6dohij8iyWO0ldlBIyqM5XaD1LKCWIAPs8NTx7lGEXyq3QMNUxdVyoU5Wt08/wDhuuvY+n/2etH/AGh/htM2lfEO7u76C4Rlurc2Mr/aEYYKsCrBVI44ANe5UdSngaiq2krP5n0GDw2NoJ+2d16knjfwvJas81l4Rm062D5SyitDFEfeSSQgn6Yr+Ws8w1X6zNQgoq+iSt+LOimte69TiLmXcxeTaOcYXpXyUozcved2dLfYp3Z8xDtHHqRULRlWYmnH5cFfp7Vo1Yz6lvO2XP5mhFx5S5bkEZb8OKhuzLjJD5FOxsnoOKnmLlZoxrBWOpE4/irri/cOCKUKhqXoG8gj6ispNna5XiY11tWY5PHrWerOa3vXIgmFHHXpW0EippS0FDHOGP14q9DkmnF3Ok+GXxF1f4ca6muaG4W4BAVigJ69s9K6cLVdCd0rhUjGrTtI/Sf9jj9q/SvG3hKLwn8WtZtI7q7QJHA9wGdsj07V9rgMxi6a59GfNYvAyoe9FGf+07+zRJBdz+KPCFo1zpc4D3EMR4cdccV7lKtUnK6PInUUpW6mP+z3+0No/hPXYNB1KNbaa0IittLUNvkc+x6/hwBXbKUZqy3OZ0eRO59w6N4x0rxd4ClstVukaZ4g9wA2RHxwv1qYUpQd2cc6ri9DyHxJ4Fv7yN7Wx+7ICRwcha76NaUVZnjYzA/W2xzM3hmyitRod3c3AK+TBbplpHGMAZ98V9Hh82ocqhJ2sfmmZcEY2hOVemlJXPSvhV8DPHeu2c2tfEL4uax4Zs9QYXD+G9AmELE4AxLIOS2ABxXFmed0HJeypptaXep6+UcA+xpOdetKKm+Zxg7L59z1TwB8MfDj3Mmg+H/DUd/bykie81+d7p9vc5fNfI4vHYrES3+7T8j7bB5Ll+Ap8kKaaffX8z1XR/CfhL4TeFJNP8DeHLCxAy2y3hWISOepOB61yV5yp0eaTuz0sLQpc/JCPLHyRxGsr8TNVukuZ/BRuRK/ytE6lQPUkkYrxIwxFWfw3uevGdCn7sZLQ6HQPhRqghS5vLuG0mLZbyEDcehyOtdtPB1OX39GZvG0qTdlc1vEHhPQrPSR9stpJ35wYFClj15xWlSEaaSOWGInUm+XRHlfjjQU8SM0fhVWtbuzQyC1mkIdx/eGainRVWOmhtFtay1R5doHhzXfGvjdLaRC0FtI0YeT7yEgbs/0/GsrVVK0fQ6nUpey5V/XzPYPhZ4A1DTRq1/qFuq2jTERW3lf6zHcjvzXVQw00+aQOtBJQW5b1XVPCvg67FwNJtZdZuXCR7oAFi9ACBjPeumUlzruFT2k6Zi/FbStEu7KE+ONZLjy/MnQTEIoPQH/AArdRko3ORWhr1OG8PeAfhT4jQ3mnaTfzWEbZae4GyIY/u561gqcW/fRUZTaudJfaVCdEk0LwNYvZwSIV82KLDufrVQulyrUuVeOjPjP9tz9jH9rXxRpslz8KbG/1S7MRNk17qH7i3Y9W8s8ZxXo04OULKdmzz60PbSvFH5gftFfBbxz8EvFB8M/FPxBHqOuhfMkKTbxDnqDg4H0618rxFKrg4ezcr3PYyfCJe846HkWo3se4o7dueetfIwqxurbn0U5QUGkcdp7qEU5610SjeZ1Ts6zNWyYuQuOgrNpRiaxSjI0EfauF9K5XrcJzu7ElqSDzzSa00YKFtTQT7vHPHNJpLcqMLoVEljmE8MrIy/ddDgj8aE+XWJTdjq/hr43n8PeMtM1TXtQuW0/TInEFjbnYoyMkDHdz95uuM124XFclVOq9EcFbDyqaRPrb9nv9uu78O3UeseItea1uNRkSFmgG/7DaqQEt7eIHl26BR3OSa+lwOdUpzjzuzemivZHJLK7U27XZ+hnws/aE8K/EOKxtvEesrZ6lNEpigF4rCFWUFUlZTtEnTKj5gTg45r3qWJVZp3t09TxK3ufCv6R9L/s42/ia98RFjA8em20TM00THy5ieFGcfMe/wCFdNSNOlSasrv/AIc8qviPe5UeyXt5babA95ezrFDGpZ3Y4AFYWXLqcDpylLQTT9Y0vWIBdaZqEU0bDIMbZqbp6DcJQHzYdjsPTrWkJRii/eaGTSmG1eaJxuAyPrUVqloNxNaNK81zHO6747t/7NkiJAYRkMV6hx2rzatWpUXLY7YUIwqXTPLbvxZfeLYpIhM4ltp9mGOBIpPAPpSoqKd2dcX71lsblr8DtS1W/s9Xt52sfLcG6W7G/eBg/Lg9eozxXTLC8zujlrYiFKPLe53Nz8LvCGoxtFrlo16ksapLFKcIwHqB6+9b0sNCDvbU85YypJ2Ny60nTtUtVsbq3zErAxgEjaR0wR6U60FNWkrl0sRKjLmi9ThfiX4C1aw06O90SVprS1le4miILSlzk/8AAhnFcv1eKZrHFOcvePlyX4j6pdeKQviiXM1zcMq2qPiWVd3CkA/ImOTXO4SozXPLV+Z68ZxVDRbH1H8LtDvJrGw1W68mcBfkjjQCK2GP4MD5m7Z+vPY91OMEtXc8mtKVrLY6nxB4x0PQJ3h1G6WNYITLKxPAFVKq72sQqUpQuj5V/ae/aztb6eWwsrwR21pljGD0x0/E/wBKzvFO8nqduHo+zjZbnwf8RP2sPFehatq8/hlp5dV1IYaSGTa0UPZQ3RAe59OlXRnKSfLG9j0YUY8vJJWM/wCE3iGfXtRWbxB400m0845+xQtwGPVmYsXZv948+lerTpJy5lp/XmRVjGL00Poj4a+AL3xYGtvhz4ntZdTtiXazkkaN7k4+6BuCn2wPzrplywiiKdKMruWx8u/tLeM/ilpPjO/tPFHhK6t44SVNpfpdOQw4yGWNQPwNfNZrVnHZ3R7eFw9Pk91nyV8QPEia1qUsv2J4SDnDSu+f+++a+Fxdfnk01Y9qlBxicXdzl32lfpXAm2dkIJK5XJOCRz70cqT1NHJRK+j86hg/3uldD0hocDu6h1sHbGfauGb0O+K0LsaNjAHXvWLLsTwRB02n054qJNo2i7IrTwbJsbe/WriuZCmrouQxKbfkDpSlCxKukUWsV80sBx3xVpaDdmixDCOmAPpSaszO2pZs4/nG0fQUO1jeCVy3doAhXHas1uazSM8RkNn161utTnukavhLT59Q1RY7bTLW7IPzC5s/P2j1C9PxNellWHnVxSUY3M6l5LXY+6P+CZPgbwVa/EdPiv8AEi3isNB8NoZzqV1fhRPOBhUVIzsUD+6PSv3nhbK8RHLKrwsG5z0V+i6vyMZ+2hgqiw+spJLVLTzOv/bO/wCCh/wf13xTdR+DtWknleTZHBbywxJt6fPJMdqA+/51+mZXh8Nl2Ep4WVROq33SV/NvRLzuZKvh8BgYUnK7ju0fKvjD4gfGLWluvGyeHdH0TQJwEkvNbmvL23uFzgeWm9IWzn70SkAclsDNVjsRj41/ZJxSTs3dNfenZ+tzlhia2LlaDSWr1svzt9xZ0f4feF/HH/FdfD3x9pcmu2cCNqmjeHtXuTAY0AbeIrh2PLKGKgkDjAGAK4qUsPKrfn5pR1Ip4rnnyt+Wh9EfDv4qx6z4USZIXAtbceYGOcjhZV9sN8w9Aa7PauSu9z6Kn7OUEoprbr1tr+P3bak8/wAY77SfG1l4j065MeoIpkWRYwqlcgOSOn3mQ49z60m6c37KabT8u3n/AF+DNvclFRlt6nfaFren/Fu0u9Mma4e7a4YSyXOvrp8dmBzlJGOCSOcYOewzRzPD+9G/L5R5m/lY9KWMoUqFm2tdUouTf3an1L+xJ+zx4R8AQy/Ek6RDJqN1AFj1k63JemZTnPzNgAfSviOLc0r1p/V+Z2vrFxUf+CfnfGGcyqWwdKT5esXHl9PM+ibe986TchyN+GJr4apDlPgZUrR1LUzxTwOD93GCR2rH3k9DJKUWrHzZ+1nNBDDJJo6i4ZEJmWOQnKhgzkgDsBn8B0619xw+pqj+80PoMG5qC53b+tD8dP2p/EV9cfEvXraOa7GxTOmkt/qZgolcu7dQAGLZ6YXpxX0Pt2oOMnY+ii606SjFXfl6Hzd+z54q1vS/iN4p8NatfjS7jUrEXCXFjKZwM8uwJwZCOeMA8isMDiPY1ZpP0O3BYmtH3UrHsOkftQal4V+Gs/wx0/VpY7rUNMZtVuQp8yzgeXklmx+9faAFGSAa+hpZlCFPl6vc9lY6pGFnfXqfLH7QXxA8e/FjX7nTkR0sNkdvbWiuyiTbn5TgfdXClm7k+1eDisViqs5xg7Rla6u9db7bWTSe+/ofL5lOtXfvHzV481LxffXesaf8Lobme10C08/X9Ys+Ci71QneMbU3sqjHJNfC5lmWInVlTw90o7tH59jsWqVdQcrJuy82cToPxU+Kuk3CJovxG1q2beCoTU5Au7tkFsV5VDG5ipfuqsrvzZVHGV8LJzUmvmfUPwD/bM8VeDrn/AIV/+0bY3DoJgo+2tcW0ikjO4OhUDOc88HOaWOx2bTw84Vqrumly63trd6aaWV9eqsnrb6DAcUV6tNKtO6ez8j6C1CTw9q8MXiHw9etdW06ZiM0pkKg9sljn61+WZnF8/Mm7ee59tgZRrw5o6mdMSxGf0rz4W5T0HDlEeHenHSpauyoO4xCbZST+VU7bEVEoq463ufOJXPNS9CKb5mXLaR1bknNZO8mbe7sXGbdCxPpScWmU9jIsONRb/ertgvcOO3NM1L0BssMj1Nc9Tc6eljFvYWMwYnjPGRSjsZtaDcAAjHPrWy0RFPV6leaUKdx7e9DauY11aQ1Jg5ypzg/lVKStqKmn1Ol+HHiy68KeJbfWIrlkZGH74Elox6r71eHrSo1U29DDFQVSFkj7+/Zx/wCCgnhu38OjTfH/AJSaPCqwxNcyeZJcN0/HNfcYfN6cIxbVkz5Stl1SVT3dz2LxZ8Bvhj8YbGDx58NJ49N1WWPfBLGFDJkZr6SjXhUhdM5K1GdK0JJu/wCHqcVr/if4u/ATQZrXV9Gubq10+Iuvkks95L2LH0rX2ztdo854RzqWRD+zd+2T4l8VeO9O8IeNtP8ALvtSR7m6yPktogQFX68gU41JVJJIiuoUYWsfW3jjwlbSxWmsaVKVlaMS2zr1DDBrp5eV76nJGftI26HZeE4PEHj7To4/D2km6uJgBfeY/wAsTDjJyeB3rzsTShOLUtU+jOmNRQjZ6M9g+Fnw11nwVatJrWti6nkHKRqVSP2HrXKoKMrkuouWzN/X/Cdl4jtfseqxM8ec7Qcc1VSjCvG0x4fFTwrbhuy1aWn9m2C6fp0AjWJNsanoK0cVCFooyc3OpzTe5biZvKC7h5m35vQGsHKTXmKSjfyOU8Z6ld2rC3u7maNmPySwgBcfjXC/aOdpM7acKXs7xOcu4YdW1KC5udNf7VHHm1u45vmc+n0rpjGfLoS1N6dCf4UaTod1rGsTx2SxXsNwEuUx0JHB6VtSoJe/JamdWU0kjuriGz06wCsQo6DjvW05aWIhJuehy+r6D4Vlnilk09JpFcv5m45UnqetZRpxjLmOh1K0o2OK8Q+ALr4yeM44cGLRdOH8S5Sd/RlOCcfka3pu8tdiuVU4XqPU6bxP4X8IeE7GGEWaySom2C2Hyxg+u3oKprne1jNTlNNRWhwPjXx9aeFrZUW5iiupshGAACgfeI9hSjGKYXh1Z5Z4t/an8NeA7e3v/FOrtNLdyt/Z1i0+wSIPvO3PT611U6LqzUYb/d+ehnVqJK0D5j/aI+Af/BO34zaTrX7RHjj4daxcXltbeZd2Gj620IunAPGAePrXl47A0K/vV4XsbUKmNpRtB2PyP/aLv/AnjDxXcW/wR+CkHg/TLSYoBca9cXd0VHdy52DPoK+NxX1KUmsPStbrc96hTxLs61T8Dy+3mKIij07Vkrc7ue3V0qs2LC6WOP5iMkcZrnqroUptl2zmaZiK5px5dCqXvSNC3Vo2BPejRRN6jSZoW5BX5uoxWEnqVBuw8sF5x+FEdQauKknOBn3JquS7HGPLuaehX15a6jbtYXMsUwkAikgYBwT/AHSeAfetKEJe2Si7BO72Prz9mvxvqOj6hp/narp9tqAcBGl1EanqT88hIY8xwnHrg8/WvsMuxU6M1T6/efPY7CWbdtfPY/bX9hDX/EGufAmG/wBdsbmBBcsLVr26EkzptU7nA4Q5/hHSvo6zUuV9Wj42pC2IkkaPxZ+KOlw3jaWurxrb42lc8FvU1iouUkmy6Ur6I4vw18T38PXpa31SF4lYE7GBDL7c10zoO8k7XXmv6fyN+SJ6zo/xH02/sRqkN2jxSRhsKffmuSpCUZOz0NYUYySZlan8RbS2nliSbMXKMN3r901yRgloaTjqkeXeK/G063dxFE7CO6gZkIPPmLUKmti4RlJp2Nf4I+HtQ8T2TeIdNgS9guP3dyDKAFIPP0NdKwyaTZdSpGjvoe6W1uYbeKDyyAigAFt2PxrsTjFaHgYio5X8x2xi9TGetjCEb7Dbq5hsrZnnuUQbeCzYGfrVbvRHQ1CC5pbHzj8Uvipq/wAHNdfUrO+uEkS4WSKzN+1wsqsRkNuORn9M12+zp1Y67+hMYxxK91WPCr630/xH8epPGd9oUUE+rXAkV4cSsmeflUnbHz6152KwlP2ytE+iotxwqhFao+4PCLw+FPAEeu6tqKultZA7BNuRMDhc85bOAT61lUXI+U8zESUqvLFWPkr9pb9o1oI7hYr/AGPdsXmCnoozsX8TzWKkr72O6FKySPhf4r/FfxH4x1eXTtLMk88srN8pyN5B6+uM1x4p1KiVSV93rrq+vrvqn5Psd8KMYR1PhX9sv9rrwp8DLqfwja6m2o6irlbxrOYGSSXuoPICr0LHvwM104alWac4J2XU4K+YQozUVqz5v8N/t2adq+oeXeanqOiSO3E1y7Sw593iw6/Xaa9OGsfj+/8AzNaOY06rvUjZn038Gf2zvjh8KJNO8dWepam2krKktpqkbC5tZMH7yToCcZGMHoeDRCvVd4dHb+r/ANeZ14jERdL93sz658ffFTwd+214IHxc+HPirR28UxQD+39D1e1WRLlgMGWJ2wwJ6lfXpXHnFGE4NUZJtfj95tllWo7KaaR8meNbO/tL6UanotjbSo21m0+43KD7qWJFfnOK9om+aNmfVUo2WjOYuQN24muSEn1OpOyIyoCYxipnOzsRFXepW0lSNR6d+tdbbdEza/eHW2pIYEDtyTXDPY6Y7GjFyoU/hWL0NLuw6NmUcdfpUsrZDZMO2D7VpDQIyb3JZGMUOVHbkUN3NJL3dCpG7ySYIPvxQmkjKKs9ScZTkcYqZSbHO3QsWJ+YAfgalNmtEtXpwmR6VSRpUfuma0p521asjmirjtPvLqzvo57aRRhxvR/uuM9D7VthsXXwleNWm7WNW3FaHs9r8RfFvxX0WDwbqvxV0zwdo8EYQfZ7eS4kI7lY0AUH6mv1vB+IeKlh40YzVKPW27OLE4fEV05KVin8dLP9lv8AYi/Zu1T9rPQLLUPij4h0vWLXTNIj8Vwqlh/aNwsjJJJAuQyosTtg9SADxX1mBzbB1ssni4Xk72TfdnmVsM8Hl9TFTd2tEmaP7EP/AASa+PH/AAWTstR/a+/4KA/tj3nhXwvYeJobC58JQW0VuXiS3hk2RESLFaR7ZUVBsbjnBqM7eY5d7PDYj3+ZKaUdVrfqr327n4vl2f4fOKuJqKrZUpuMnfd2T67KzX9I+V/21fB/7Ln/AAT8/wCCid/8O/2KPi/rmt+BdNe3tr/UpNbW4eO4KKJvLmTasnlvnnGOCOetXCliMHhKWKs4Tle8dbW6Oz1PXyTPHLMpwvemmrO/lr+J9w/safF6D4lanqXgjWZ7dPEFpsmmt4ABHqFs4wl5CD1RlI3r/C2a+pyjMnjJOM37y/E/VaGYR+C51Xi211fR9WutIvVdZ7Fwi5Q/cLZOM/7oyPevo5NeyvfXt/X9anfCs5xvc9v/AOCctho3xF+IGqJ4t0/T5oNI1HdBFrasUVioBMaAbZGI6E4xmvPxuZYijl01Rc9Xb3dH/wAN3ZniK01hJqLlzP8Al/Vn6YaE1jp1kmh6XoSWcEEAkHkqoX8h0r8prSq4io6s58zbtre5+aYmNSpJ1Z1OaTdtSxolyLhDPK4QiThCentU4mLi7IVaHs3yrU3I5Mx5cgD0NcK30POa10PF/wBp/wANaZfeFrq8g0y53wo7Fkg3ByACSc84xkZzivq8hqVPacrktT3cGqlRpNo/Dn/goL4At4PGOo67Y3xkOmQiW6hgz89m+VEgXPOxiTg46jNfTYtuK5Ybn0cUoUU3ujyv9jj4OeBvitHLL4h+LC+FtRMSx6dqU+nPPFJ1ASUrh1B55AOCK4IQlzc6ZzLGVKeyPbvit+xD8cfD3h6HxPB4Z0TW9EthNJPrfhmVLpGI+5IxzujO0k4de/bFelh5Qu+Z2Z1U8xov3XfmPz7/AG3vG8Pwl0qfwnoFwo1a9Xy5riP70MW4/Lnsep/GuDNsdHDYeTh8TVkcOcY9UaSpp6yPmP4mfEnwJ4w8GeCPDng/4WWvh+98N+HJLHxFqtvctI+v3bXtxMLuQEAIwilihAGeIhz0A/N61ODkpLd7nw9qqlLmle708ji1dy3mKa7MNGMJppXZTi2j9GPhRpVv4k+A3hGz8f6Na6lcHQbfz/t9ssjEbf3edwzkJtH4V+bcR5jiK2d1pxk0r2+5H6ZkOW4dZPShVgno3t3baN3TNB0Hw5ZjTvDukw2VsDkQwLhQfYdq+enUqVZXm7nu0MPQwseWkrImKGTAHr1pxasayeg7ouCMcCldmcHZlS+3bMDipTu9S52asJpMRABZc896TTbsjOCUWaMkgTBIx9apKwpNJk6TK0JVD0HNaaM3vzQMywbOpt/vVtF+6cVLWoa17kKSPSuWpqzsmkjLuVDnIXk9aUNzDm0IWQgbm6Vu3ZDiklco3XJI9B1rHmfMZSSlLUZbIAoxgZq1qzOT5WWdxjGc8H0rRQuLkclct6dr11Z3ltI8xaO2k3xxsflB9cVtSm4yXNsjllTUZXR7p8Cf2yfHmh/EXTpNd8VT23h/T23TRBvmnPvXtYbNKka6u7QRzYvDwlSfLHVn3H8H/wBvf4UfHW+m0XVdPhWzMq21ubrGZ3PGEB5Jr6rCZzQrxPDqZdWo0+Y7bx/+x9oOsOfip8JolW9gRDJbxnG9VOce/evRhUcZc6PEqU4124y3Pb/COtLrvgzTI7m38uaKDbN5nVCBjb+dd8Oes1I5VSdJqNj0b9m3wF490vxlL4lu7N7fSJLdxvkfHnscbcL1IHPNGJdCNHlveX5BUhG/Mz3VAF+8RivMcW2ccpaiTSIg3OwAq7qK1JTbGStIIi8adBngdaG+WNzSCc5JM53xN4uXTLBbgAoWYjcWxtPvXBKftNT0I0Y0pe9qjDfWrnxjfwafZ6hbGbYGa3ngZ1xnqSDUpN1NDRqO6Wh2dlptjp0YSC0iRwPmaOPGTXqQjZann16zvZPQlgt7SCZ7iK2jjeTmV1QAtj19aqXmZ87nGxQ8SeJfDthYsdRv4lyMKCec1jzRb7nTh6E+bmZ4340+NOjeEZJJftWYADtwep9/xppczOvnUpWidd8AfjPoHxJ8OyT2nlxzQucxKwJYev1qmuTU56tKpKWoz4wTmL/iZ3LbIoocvID8309qiE+ZluXsqNkfDPx1/aJk8QeKpdJ0mRBFFuE0inIihTr+f61tBtbnLJSmryPlfx14d+MPx2+IVx4+1LENhFGI9L02W6WIiBeBwTkk9cChRnKd0a0acVK6Rl+Ovi7rfwZ+H13B4+hs9CDQukNtewy3CXBx8ucJsyfQmjMa6oU7t6WO7C4etiai6an5+fEzx14j+IWsT6pqt7E5dz5SW0IjTb2+VeMV+dV8dLFV7tWX3H1MMNGEEnrZHCrDwCvpVJrm1OrER1bRbtiSox2GOtS3czpS1samkcOMjvXLUSudkYrc1yegIrGXkZz0lqWoGOBjv0rJq50UknEnGD+I71n1DRSF2kdq6I25SnrqWYY0bAkx178isJ8yY3NLY9s/ZX+D/i34l+OrLwR8MJtRfVLxgNlrqErCLkHc0VsmyIdOZHGfTtXvZdltSfLKnJ67vWx5WOxFKn70lf1P3z+CngS2/Y5/ZF0T4Y+LvF+7VpYWk1G+uHy7zP8ANIRk5O0YH4V9lgqE6k7N6I+Lqfv8RKcVoeA+IPjJ4s+J/wAS7n4G/sZ/AmPxz4qt7RLvW/GHjnUDaaHokchYRl0QGSZztYhFXJ28mrWIw0ZN72dtN7nn1JYihUSS3Plf9rH9rv8AaU/ZL/aGf4P/ABP/AGx/h/4k1TRNEXVvEnhXwv4HNlp+mFnAiszctKzPO67iE4bbgkc4r6PDZZLE4CWNUXGC7rcMLi6bxHs6msntZn2R+x9+0b4P/aJ+CifF74Z3xl06baNQ04vmTT5/4lI6hTyRXi1PZ1NYbHs86jodZq3iEJqDtHelojEVDZ4J7Z9xWHslBXEqrvoc7p82s674hSwFu0siN9w8CXJ7GlSoSnO6RvGUbXufTXwr0DX/AA4i2cXgez0yykjDTSJdDcz44OwA5PqSRXXNU4q1zysTWc20+h3SKNgJFcU3eR5zXcaVZX3Y4oVrmlNWjco65fWtnpzvc26y8fKmOp/KumFOU9nYVVXjtc+Tv2rtWvrK7F7/AGDGjeT8k00YIA3DOeP512R54aHRg8NJr3dD4yvND1i7/aSvbbUPEupMs1xBLbabb3TJCy4JLHB7fr+FckYXq2kfQRrUqWHtfX+v6/rX9J/AHhi08Wfs/Hw1b2v2KKO2aSNknMkkrAE5Ixnk+nPSoxVLkqX6M+fqVr4j2lz8zv2p/GF2niy98NJPPHNuaNBJGUZOxYg/dP8AKvGxsYtSgm15nvYPlqwUj5K/az/ab0r9mH4Mavd6FcJP4mvbGZY5lOfs+RjIPZiTjNc9OEqr5LjxdXkpNo/Kzxp8RPFGj3etXnijw/pWpyeOPDNsYbq+jMr2UbSRyiWBgw2ShomQk5yGcEc19phsxqZXhauHVOLjWhFa9PNed7/M+CqUI5lVp1faSi6U3e2ilurPy1ueaQjfw3YZrzU+WNpHt1JtKyP0C/4Js2/i7wz+zpqY1pHt4ZvEkd5oiykMHheBklyhJVo2KJlSOSua8XievXyulRUVyykub5PY+v4NwscZRrTnrC6XzW/6Ht2lR/B1LuXVbhNT8HayeYtU8LRB7d29ZbYuoPPUqR9K+Zp55HEe7idPNf5H1E8njRqc1J+72OM8XTm51OS4m8QwapI3JvIbV4fM/wB5G6H8/rXkV6tOVT3ZcxtCmznpmJkwag1Ss7DiwEefWsmryCp7pDpXN0Xx/FXXoqZirykdVYgnJPXA6VxVGjshGyNFAygZH/16ysNxsSIuBz+BpaI005RgB8wnH6U76GK0ZNK48vBGKz6mybkiCOFlPmEYNaKN0KomrA5LHgDr1quVWJRLp7MHK4wfU0uRLU1g0noWdRk/d49qm5dT4TN88DBz3pqzORN3Ft9zSZx9aqyLjNXNrScGRQgHX1qowkprl1RrzNnt/jn9lu4/am/4I3/HnTtCt2n1vwVqWneKdPhRcu62qyeao9/KaWv13h9VqmQRoR2lJ/erWPl+JamKVKNFfDK/3n41N+0d8ZG0w+FH+JWtyaW8yytZNqEgiZ1UIrFN2CVUYBPQV9THiLGUKcacpXUNFdK/6/mfkC4ZymE5SjSUW97Lcz/+EkGqyNJq15lgchnPJOa4a2dSx9Vuq72OtZbGlFeyVrH65fsmfAD4lfGv4F+Cf2hfhJ4C8YfD2/8ADcFrb+F/FPjVIo7XWZ9uZYowhEk1u5HBKEAHrXfhq1fETj9TXvLv1PucuqQxVKMZXjZKx+nHwR+Ffg/4u6Vpur/HbQ7bQPFCKE1W3gkE1rcOOrxOACVyv3WAYZ6cV9ZWr5lRh8F3b7j3HXr4WnZrmPqP4c/Ar9nnwhafZvCukxpIrAymINuZsccAcjnNfNYnMs9taWi+R4tfN86jK0Eop+SO/wBK0u10fSb4WekywKIwscs8m/ePxOce3FeFVr1K9eHNNP0VjyqtepiK9Nzmn1aSsSWN5aRoEkuQJIyN+SQPpjNa1Kc27paMwnGq5XS0ZqWupW7Moy+8tjHOD7d+K8+VOSZk6M1d9DjvjzpFrr3ht7B4NSnlKnyo7FtozkZySMAD1NetktaVCrzXil56s6MJJ01dWPx9/b++H8vhb4oxa3rlkLmC4SXT9QdVASW1lLK+eOSCR0PFfZZhiIxcKkNrdj6rAv2mH2ep8M/B03Pwm+IWt+BL3UpP+JdqTQxMD9xNwKEHGSCOfYmvOhiZudi5U1TdrHvXxF/aKm+BngZdHGqtLd64ZEhg84yllk5JIboDk4HboOOK9O/PC8jllSVrO5+an7TXgDxZrni7V9Y1rUZLgag4ubCV87CDkiPpjOK8LG4GvWUo30ex81mOHrVXpuj59tltNP1B4Nf02eVEVkaCKcROGxwclW6HnGOfavlakKdCpatFu3RO342f5HmqlO2js/NX/VHpH7Mf7Onif41eL4LybQrhPC1jcq2t6o0ZEe0fMIFc9ZHxtwOQCW6A1w1szWW4KdW2m3nfoj1cBl8sxxUacVpfV9D7uSOKJFghjWNEQKiIOFAGAB7ACvympUdapKcnq3c/UaUVCKjHZDJdxGPwxXO3qKd7hgouf61UdjaMfc1IZJgAcnkdKGzC9mMKPcMAOlQjeMb6li3tDCw2rj8K0T5YktK43UYZSAUGRUKSuROnfVEtkNtuQx5Ap3cmVzWjYpWGRqZI/vda6Y/DY56Vue5sXwypHtXNO6Z1VHdGZJksAfwpwRmoWRFdzKqbPwOatvQyb5TKndmfgfSpSuNJN3HRvsBbFbLRGNRWlckifepCk4o5luXTk3oxkjup4PH0oTTInTu9ByO7jGTj0xQ3dWZCjFG94G8da74G1uHWtDumiuovlt5c8QA9WA6A4711YSu6VSyObFr2lJxP0x/4JvftzX/j++uvCN2C9jpFtDCbiVsmeQ/eJz1r7nLMWqidtkfGY6hKlNPqfb2jf8I0NRTVXhQRSEPGP4Ax7n1r6fDzUItPqcLtLbc96+GusWOreHt+n6tJfLDIUaeRAo3YGVUDoBXLXjyz2sck4u7uYPxP+KVroKbNI1QJPay5mQ8BsdverpUPa7lUoxSd0R6J8dfDfi7wzJeW12sNzGMPG3UH1xWNXBVE+V7BCk1K7NTw/wDFmxn08LqsZSRRjP8Ae9/5VlKHLGyNZYZ814nI/GDWIrrRJ5rCQyFW3xqoznvggdax9jJGkrLSW52HwmfU7rwhBq+qaetq9zGpjiK4dV960pUknc5qk7QsdADls5rdyPOlK8ixEvOSOtTzX0Oilojhvjl4Yu9a8Mztp9goYIdsi9VOOvH4VnGPv2sdKm0nqfnn8TP2g4rvUdV8AeIbgWer6O/k3EMnHmKSdsgPcEfqKtp7MqE1ubn7H37Sel/D7x/b+D47iNjOFx+9y0rN7fiKbkpJRR2VXGnC01bRan0X+0H4g8R6f8ONQvNXu/s39pzSfZw/G2PZwfzP86z9m4nn1ZRk0uh+W/7Rn7SHhn9mnwNrviK4g+3yWls9xfXCo0hjTdt3MByBuZAPUmtIylJNRWy1Mq1WNFJX3Pyl8fftvftweP5tY+OPhv4t29to9hdIZLWw1W0ElqkrARj7PI3nMBkAkKQDnkVtDLsXOg8TBqUY72auvVb/AIHBSzGH16NCTkpu9vddnb+9bl/E+i/2J/8AgrR8QPHvg67+HX7VvgO28WeFxKLe5vUiTzMlT1VuQec5UjmvJxmaUsPJU665oy/A+ohTrY7llTk4um7+7a0tGrPTbrpbVLW10+b+POj/AAT0jXpPEHwJ8Zve6PeksNJvkZLiyJ/hyfvKO1fK5nhsDCftcLO8e3VHt4PEYiScKq1PMVBaIELnjqK4pO1Sx7NZc0mMtEnZ8Enk1TfunP8AAzc0cHIyefWuealY3jUNSaQqcnj61mou4tZMt2sm/tUTi0dMNFYnO4NjHBrImUSaNyAGxmtY7FwblGxNGrMdwOPTHas5uxtCmk9T6n/Yf/4KC6f+x3CniG1+HMWsa3FcKlpp/kiGzVBgmeTad0szHozcJ1APSvs8t4jw+HwSpVIt9Glp8zxsyy2tjJctKSWqd2r6X1W63Wz6PWz2PRYf2+f2iv2rfjDc+MPir4zmDalEIbHQ9ODJa2EOdwjjUdeQCzk5P5CtKXENacpU6XuwkrPz1v8AojOplWGw1PRXaPDf+Cmv7RHx1/YI/bkt/i38PviX400bQvH/AMMNPuby18IeIH04X7xDyJEd8NhVkjc/L8wL5BGa9vhfE5Vl2bOpj6LrU5K/LdKzto9n/XU/O+I8vxmPotYWpyVF18j5L/an/wCCqPxH/bB+GekfANvh54a8IeEdN1UXlzDotqz32qXZODd3t25M13MQTl3bJzX0+aZ1hatKdLBwlTjN63ley7JWskeXleRzy+ccRiZ89RK17WPvT/ggl8ZvFnwC+NeheA7bXpda0Pxtpch1bRwCxt0jA2zNnjkE89sGvmsLVpxlyNn0NZTqQU1c/VS48W/C3Vr258R+F/FMM1ispD20tzHtBz93cCRkfUGuyUqMp2TuXT51T95nSfDeTSvFUqT+FPCd/foZPklsbfzFjb2lA2r/AMCI+tONSMHbYuU24e8e+/DGD4i2oeHxRpC2tkEAhNzqnn3LH3VV2qP+BsfYVhWfNK6POunJnZgnbg+tckr81yW9Bk2cEg9KE9TemnymbrV7JaabLJDaSTSbSFSIHP1rtoxUnuKpK2qPkb9rLxAdPQ6Rq9pdfZb+2kjlmnf5FkJOAMdM9M13qMqa5uhtSjHEx5NdVZ9P60Pj/wCI2qpo3xe0XxNZQGKS5so7YTDOSQwBUEetcqqU4zUup60MLFUVGP8AVj6osNU+EN14Pk0nx5rutx30triOTQdbkhlhBA7oVG7npXTVjKrT0RxVabcbRR8Z/G79jOy1TWtW8Q/D39onXr13jLpa6yfMc98FjzkY9ea86eApVE3ezKpVKtKGq0PhH9q/9kbx1498Iarp82uKupxWs0cYkY+VOc8bjj5TwOvTnn189YKNOpz32/E6aqeIw8lHqfmX4p8LeI9A8Qz+DvEtpJa32nO0LwXJ27CCeOeMHJIPQ5969enCM4pSdtNLngRoypXbVu5rfDv4KfED4jazFpOgaKBG74m1G7mSG0t1H3nkmchEUDkkmp+p4uvNLlsu/T79h1KtL4U9T9FPhhoHh7wL8JdG8L+FNd/tGwjgH2TUdjIt4qqsZmQMAQjsjugIztcV8TxhiI18fFX0ilFfI/UuFqKw2Ux0s5av1Yl/K0j4c89+a+QSij26tRvQy7oljjp6H1rKKtIIx0MyZwkxJHfpmup3auYTvzCOz+WeKxcuZlVIpoTRlP2gg92rqbfIZRtB6nUWEgEY57AE1xzvc3jK6NCJiRjPPas3oW3oPVyGxmpbuRdiqcPkimk2hpXdyQZYgH0p8tmbQQly4jXjrTvYqrflKsd0WJGO/NF7Ixin1JbacrNnbgetLmNYcqZYvZg8eCegqGyqj0MtsmQknjuKpPQ55WSuOhn2yhBzn3raKtuTFam74dPnXiRZyCRklsBfqfSunDR56ljdNH6j/wDBEvwcLiz8faHfy2GoaPq2jol7bJlkYEMrI+Rg5UkfjX7hlmB/s/hehUe7ndfceFxU1Ty6hOW/M7H5af8ABQf/AIN4fjD8OPjZrXjL9lCDw/4w8BazqTy6fHca9FZz6H5jMTDN5jqNqHgHnIA4rzcywmMq4luCaXkfIYqjD2jnOmry6NtfPdFL9kn/AIJkfszfs7eLLPxd+094psfiR4whuP8AQfAXhtXl0uyl/hlu5Tg3QBwfKTCHu56V7uSZJTdeH1m9m+ivZd91d+V0cdDDYly5eX8f1P1T/Zq8H+PvGVxZfEj4tahDNd6fZRx6JpPkqltYLJxHHHEAFQDHQAACv0ChgqeBpWS3Pq8Nh1CKuj6a+A3g6x8Ra9qeqxWsIh0u5lncLkbpBkJnBHcuce9eVnmMlhqcIX1nZfIWYVJRhFPeTsdN4LvW8WyzXGp3bQ/ZpWChCFAGSNzsMMzemTgVyYyLwkUoK/Ml5/dfRG9an9TheK5r9/06HqvhvUZ7Twrc2l5qRuPsgQmdwRlSAevevksRRhPGRnCNua+h8fjOWrmEHGHLzX0Iku4bvfPaMELSjIb/APXW7pyhZSN1RdNpT6Ict/LBcrJHIWUn5wjYVTnqR26YrN0YyjZqwSipQaSKvj2w1fxbph8G+Hpxm6RlvZJ2wscLggnodx7AfnTwMqOEn7esttrd0c9H91L2ktLbep8E/to/sl/Fbxf4Q1p9M8OarqOm6WxWxuWhBMsfSUIM5I3DepAP5HFfUYjHYXGYdQjP3rbLX7z18NmVOnJQufkD8ct/g74m23iDVLfy7m4t2s9SWRSD9otzgEgjI3JtP4V41KcqTXNue5WmpRUonKRnUPi5eT6xqTh7okyW6sSQgjUYAz046V7GHrqe7OKblUuZnxJ0+38T+HhplrKGaytGk8rHzqWbC4PoCrcf7XtXXOpTVJrqY/V5RSk9jwrxl8DdO8T3E00qmO+itmkDRL/rVAXDY79efqK+XxuBp46eukjzquXxxF57M9a/Y2+F9h8O/A93rV7p8rarqEuwXssx2iAEHy0j6Lk4JPU8V+X8W0auGqQoN3W57vDeXU8IpVHrJ6XPX0YEfMc+lfFTlpY+pbsJMCcDIrKKM95CfwcgjiqlK2hve0SmYS0oXd36k1N9DO2ty9axKnUAHtxTUW9RqRYbCkD8sUSbYPcbNHlASozipiaRtYjiQKj5PatU+xnKKbM7Typ1Nhu71vFysc0E4zsbF7yhGccCsKj1Om+hmyk7chaSlZCumjOmaSR2X26U02c/LdkbxgBSacdyrqJFcByhIU+xq+ZN2FKKmhukqxG16mXMiYyUXqWJypfBNEXyib94jQtGePwzW6lFoiUHJ3HyvlCen4UOKfUynBpHo37LH7RcvwF8Ufa/Iee28zeLSI4M0p4Ga9DLswnh5ctjzcTl8aurP18/Y4+MDfHzwLa6fqsUdtqE6hhbrcBjHnoDg1+k5VfE0eaT1Pj8c44Orax9VeNvjB8GP2OPg3AvxI+IGn6UwiPlLPOGmnmbJOyMZZzk8AA54pVqsXW12PKrVnzJPc+LPh1+2v43/bu+Nmv/AAG/Yt+G0D3Hh11/4Sfxj8Q9QNna6cW5CrZRZuJ5cHOw+WADlmHStqGbUZv3VeK/M5Pr8liVRglffXt/XY8xl/4KZeAP2bP22db/AGN/2gfGmk3Op6NNbxJ4w0Gylt9NuZZEVmgkikeQxMjErv3spx2r2MTyRhFzVuZXS8jtwGMp5hOSg7pO11+J9k+DPjBoHxBkml0XVY5FZsW6RSB8jtjB47V5nLTc2z26vLBK50smi/ELWG8u18N6neJKBmS3gxkfViBn3rKcUjirYik5XbPfPC1vNH4esrOexngaGBUZLjBYEDvtJFYxfKjjxDU9YsvNasGLBSaTscsKepJCpDD61Kepvay0K2safBqGny2FxcFA6H589Kp3vdFJtvRH5H/8Fffh7pvw++I1r8WPCl1+8jP2bWVClN8Z6MfXB5FdNSjUlBTSFFOL8jzr9j+yEPjm3+KWheF9X8SvahWhgs7cykEdRjI5FTCmo+9Y1rTc4KLPXP2tf2s/jh8cdXHw+8PfB7XLKaCArb21/AIDJtXPCscnpXNVVZ35VoNUVGmpSPxb/wCClPxO/aT0PSta8G+M/D+p6NpuvX0EV1LgbJ7aL94IZCDkZl2tjvsHNLDVcRSpTh1l+R5mLhRrYum39m9vU+HHjJIwAR15ojFpanVGLs29D6c/Z28OP4f+EtlJNDsk1G4lvHyOdpwifomf+BV8TnOKVTG8q2irH1+QYeSwjqS+07/LY6i9cRKxAydvWvKi+edj3vZqKuR2jbYgT6VvNNzY5fxWWAh3DC9T1rWMUkKdpM0dOBjO8ilJq1iuRKNy40wdsbgPqayVkzKMrMu2TYAGecVjVZ2JJK7Lm4H5gOtc63BO6HRPtfaacpXWhKbiy7BHuHy/lURabszfnsi3bAh1LH8KrToKNRdD1n9nnxXLovimC0aDVJbaeVBNHpS7mlwQQr+iZAPUDiu/BShGet/kcuK9o4Ple59Y/t0/8E8vH3/BT79hez1j4OaIJPiN8MJ5rnR9BaWP7RqGmTqPtFmrfd81WCyIp4JyP4q+uovmipx3R8HmNJwxKlfc/Ij4Z/8ABKf9tL4k+NG8O+FvgR4ntoYpwuo6prWkPp1tYhW+YzT3G2KMDByS3GPpXowVWvG669TzK8Jxlyt3fbqfqv8AsR/8EytO1j4kR6dpXxmtJbfRNIhsde1Hwfdzb7kMo3wRzFQqJnI3IdzdRgGrnl8K/wC8hUV46OPV+e1vx6+tvQw2JcKCi4623P1I+H37K/wr+D/g+x8HeAPBum21taRKwMtkZXhbHLZbO5snknn1r1KNKlCKsrGTbi7t3Po7wdpS6F4QstOXYGW2UyGOMIGYjJOB05PSuCSUq0n5nJOVtEW0Zy+NvGaqSijNRtqSklRx1rJbsLMikdscjrSsrnQvdgVb+a4GjXFzbo0bLGxBK5PHoK6Icikrigudnwv+3FqXijw/eWUt3pGoXJuoJHmt5W81DH6lQMoR1BHTFejUqJq0NT0aEYxWmrPk34p+ILe18N2ms2d8ZJdOm8+0lYDJ56H3FeVP3WmdCrNvQx/An7RWjeKPF8OizXT3cjQYnZyR5LZySo6fjXXSx0ZaGkaEnSu9D0XxNI6wP4itZmdQgW6jQ8SwkY38dxW75ZNTRzOzXJI8c+Kml2ckk9y0gmBXbMGUESRPnax9xnFc9Rwd2jtpKPKkfEP7V/7JXg7xvrV7dXumGa4SNJbS7t3CTLGeCA3OcHswI57V5OKnNuy2Kq4eGJ33PAvhJ+wNp/i34p2Wl614n1D+yUuwbqzm08JJKgblN6uRyBjP6U8OlKOqPPjk/ta65paemp9o/FjQbPw3r0ekaVZRwWMFpHFZW8Y2pHGihQoHbAFfL8QUrVE4o/RcIlSoqMVokef6jIHbHIHvXyctzdtszZhjJzkd6TkiryM26iBkznvWiqaWHa4m4BCCeaizuQmJpLD7T1/i612aOkiLNysdFpznGMZ49K5KhtTi7GlEcKMfjWL1Ld72FiYBjnn6VXs76j5R/mK7BWz+VLVFqDZOrbDv29e1OzaKTsMlIlODz71i207BdtkJjVMHbz6+tWk2ElZCAkMcA+1aciRktJXCWdmTB4IFYvcubTKsrqOcdTVwV2Yy10CGF5JlVAS7HgVq3eVkKN2z1f4BfA+++Kfiq10+x1HR2lLASQXmqRxvnI42kgmvuOGuHpZliY6rzOmnCE7an66f8EzfhzY/AnxJqPw5bUba5mu9NW4uBAiYjIIG3Kjnr35r9w4gwFPD8OUI000oOx5XGdCM8lpVIprllY4f9s3/AIJy+DPGHi7WvFa3d7anUbsvItlqMkG6Nwcn5CB1wPx68YoyuWX5lhYwrx95K1/Q+fValmOCpylG7Wn3HjfwP/Y08A/B7xFcJpMciz6gZIZ5p2LtKyxtKwdsncFMYPoCv0r6iGDwOEoxlCCutu9/L5X+RvhoNRbgtFv6X/zse4eKvHGhfD9NQspbgGSwvrcJhQPkWHA246jdk+2TURTr8s3s0/zO2im2rLQ9q/ZMu5bX9lbVfiJqMAD6tNN5TZOZEBKKT+Oa+Mzuf1ziKlh4bRsebj6kaub0qUX8Ope+HmgDSNFttfv4Le2a6k+W1YHMhJ5dwOWOM9fWu/H1vbV5UoXduv6K+x3Yit9YqypRba/rY9rR7Sw8JSaxqNrHE00SmQLwG7AV8NapLGqlCTdnofEVISnjVTptuzOTj8WLfo32LZEAQCD2X0Fe88F7N+/qez9RlGS59TO8QeNVtpZGnmDWvlkYi4bpkn+VaQwyjBWVpA8PTjBJrX1L2gfE9G0rztLElzcTWzvFEU4wo6kgZ68fjXBXyxVJpydlfU86vQ9o+yR8ifHNvj3481HW/iP4v8XXY0jS32afZ2UrQpGwz8oGQM8dT0r6bC4TB4ZqlSWr+82p0qcNKcfVn5a/t+aQPjL8P9Y+MGliy/trR9SEuriBlLTwg7fOKp0IzgnuK8zMf39WUo9D1o1lTi1I8K+A98lpbRXUq74vMGXQbuvX8MVxxrclmgjeZw95qd/4c+I+t+G9aKyLbaxmJ8DD28udo64xz+de1GvTqLfodKilLVl46LZnWX8kq5tLopkDny5BnB/SuaE6UqrUXqtzT3eWyRu3PjPwv8O/E1l8N9ZlkiifTI5Vu4gSkM7E4jcAHHy7T9DXy+f8NUs4brc9mkdWCrNVfZpbnXOsUE/lwX0NzGwBjnt3yrD19vpX4zmeBqYDFOlJ3se+6dlqPYq4GK89Re4uVDJX2KQOmOtHI2Q5JMqQsWlBYj2q+SxSd1oaMWCBipbaGklqx0rAYBY8VNmwdmwkcGPPbH50+VoV2mRZDI3PaqgmmN6amZZBYtSLH1rf3rHNGalUNi6lzx3xWU1c2lFrUqTbVT69ay5WKNjOuwIiX6ematRZEnGBntdM7bRWzgkjmd5O5KjgpjHX1rPkdzeGqsNiJiY4HFXZJEVIq4zezyktmo6EKHM9CYYA9PqaqMWWm07CTHMRI/nWiTJqOysULRjBfLcZIKtnPpWkIxjJNmK5pxPpP9kH9sDxt8KfHum6TpXiFtP06SQfbJkjDSSDPTJ6fnX0mU53Vw2I5L+6eRjMpo1Yucldn63fAK3+Cfx/1qx8Y6vY2Os3kZjZ764IlkXBBxuOSv0Br7GNWGJTlHc+OzCCg7NWPwj/AOCwfiX9pP8A4JVf8Fevi5efBPxtqfh+18d6r/wlOj31jK0Zltr4F3CsDztkM0Z/3a2ybMXllaS5FJPRpq6PjsyyPD5z7lSTTV9U2nr6Hx34Y/aK+IvxK+Laap4gt7vxNrfibUkinWRy0s7yOBwepb0rozbN6mYYr2s1Z7WXY9rJsBhcgwaw9JaI/b/9ir4Cftoa3f6d8Rv2drpbKx0WxhtNY/tu+aSy1F0UBzgAkSZ43L6Csabk1zp6npVKrrR11Z+qX7PPi34w+K9Ej07xjoV3p17boq3Esbo9uzd9hcbsfVaVWvGV4uNmck04yseqw/8ACR6SDLeSpcr3OQCPyUCuPnUupslzos2PiS3vGMckTIc45WtOS5jJSg7MstcRH5kJ/KlyFXuRpdm6VlktWVc4+fvV8tluNWTuj5Z/4KIfAPVvj74YvvA2k+A7aVLrTpB9umIHzAZGPevUw1SEaHK3c0jTdk5PRn5J/A74qeMP2c/EN94Cup7iy1DSbqS3ukMhB3KxAIwehGMVxQqKL5ex2OnTlG9juk+OOqa58QLXWNV1WYvMpVbl3JZW65BJ6+9dEZ0+phKStqtDO/a3+Dnw++NXgS+0rULe1u5L2wJuYbiEFpOp+91yOoNarkpx5zn9lTbtNan5Gal+xhqPh7xfrMF1Z38ukaZdIyyIgyIC3zF/YAgZFfK5hjpx5nTjsdWFwvtai9s7RPStlvbW6WlrGqRRIEijQcKqjAA9sCviZ2nJyl1PuocsIqMFZIo3WCD8tc0ny6o7FHmVmR2y4VQfWvRfxswn/FZeQcjjtUO5inaRctldY8gdaxlJHZzKS1IiszTAN2ppqxi48rujXtXMcYOKzcG3cv2mli/aHeMVhJJGlMlMYLZHpUlzehctiUXdjp2q0kRFczsWY9zEc49TmiUopaGiUVoevfssaf4N1nxX5Xie2guo0cCS0fxIdNG3I5dsHzE9QOa3wTjUqchy4mpGMGj9rv8AgmB4Y/s4tLYHTrWztrLda2eikyWrxNgB/NPLt6mvvsFho08I5S3v20sfHZtO1o23PKP21f8Agn34i8b/ALVd7dfCrwHpGnjxXOt3f67/AGULqaPu7xiQlEfcTyF4616FONarR5Yzso7o82MsLGPtZRXO9L9bH0Z+zd+yn4d/Zq8MR6HZDzRbIZr+8kyzzznux6sxJya3jL2Xw63MoytGzO8nuZpExbgtcXUqL5YGOWbFaKp7w1LRM9J3XUF95kxVbVbZY0G7ndnkkY9Md+3Suf3eW/Uys3JtFlVUAMvNY1Ndh8tlqDkk5CmojcIpJEGoahHYwGeVGbb0VFySa2hSU5aMipJxiVfD+s/2xBNHdqAYuX29MelOvBQVosdGNVwV9z5V/bY8ReBfiyup+Btf0i5u5orSQWsVocSKADhh8p7jPH5104ak0ve2OqVGtBp9D80PGEeq6HfXfg6+acwJGwtmmUbyuONw65FZ16atJI6qUuZXe5438DbXUdU8V6i1lfsdRtr13iduC5B+6fY15mEi1UbkejVm3FLufWfhjx7/AG94fjhu4kh8g+XeRMuChbhlPsTyK9j28eSxyRoSctTzTxBqotNRuPDmpTAm3maEsy/eiflT74NcbrqKsdsaLTPONe0eK/migu41Z42ktpj6gjIrgc3OdrHbGk+S9yn4I+G0Hh+Z9XulTfaxPJK7r/AuT/hW9Runbk26nRRp8vvGN8Qg/izw1a+IUJL5ODu7dq8DN6bqxuj2sPVVrM8p1RiHIcFSM5Br42cXfU7+WyujMupAo3HoO/rWagmRzNuxQlkDtyc+lNQS1No2GvGCmM1V9TCr7uw3SGAnPHOec10ST5LWFCa5jo7BQGBJ7VxyjY6YvU1IV4wO3Ws7MvUeI1yOfoa0Tdhc1hsQIm54Prik4iTk2Wzs2bS3albQttxZVEx8wjHWj2a3HGzFaVME559TS5dCZtpkDXcYYAHiq5LoiLTI2mZ8EED0qJUwk0V3ZjLs4xVpKESFrqafhmz07U9bis9TuHjhJy/lXEcb/gZCF/Wu7LKFCrXXtr8vlqzObT91bn3/AP8ABPP9n/RvEc0vj3wp4Xii060T/StZ1S2tnuEYd42jGPx5r+huFssweX041YwfvbX3Z62GpYbD0+acfee3mfZf7FWq2ev/ABt8YS2108g0/RxC0juCzkn7xwBgnFfVcZSlDKcOrbyPI4xk/wCzaMYr7Z71C2ifFHwYl/qjr9p03dFeR9SWHQnnv1/GviputlGN5YfDOzR8RiVUyzGSpQ+GVmj50+Ifibwh8MIb6C7FsGNrdSWEBUfMpCiR5D/fLyk++7619xSnPF8rbfS/y2X9fod9CCSSjonq/m9f+D5nxX8VPjZrnxH8c2+jeGZVl1DUblYrW2CA7neQqigHrx+p9q9GNRUqlqTXuK+traa9dP8APY9GFVYeOi27n6Q+K/EGlfA34NeFv2fNIt0u9Wh0aMvbMuVZkUFy3Hdtx/Cvi8nwNTH4+rmE3aF3qeHlWGqYzHzxU9I3sbPwttdS1Dxelpqls0tzLCslzJvAVM87UUnIUDGeO4680s2r06WCcoOyvp/wfM9rNHRw2XOpGVv66nefHK+1EaPYeEPD1q9xfX048q3h+9sXqx9ACRk8da+byL2NPESxNd2jFb+Z8nklShGtOvWdkuvmQeGPg3rsNolz4h1uNJiAWhgTKjrkEn610YnP6NSdqNN27s6cTnuHU+WlBtd2eefGfSJdJ1z7EsuSf4ANqOPx712YTFKtSTehNPEe2ipI88+EnjnUNT1KPQrZhFCfNtIpC+WiIkb5iD2xg/UivTl7OVJt9DqqUrRbZgfEDwH4v+N96nwV+F81tLcyyyebd3mXgsYhkGeQcEkk8DqSa0qYqjgqDrT6oxnOjRpOUtEz5c/aC/4ILftDfDPQ9W8XfDb416P45l1HSp4tT8K3GniwnuVZDuW3+dldh2VsE465rxKeb4WdOVotfijipZhhfhkmflr8PdD1jwpJceFNf0y7s7/TbqWzv7G5RoZYXjYqyurYKkY6V5sq99EevTk2uZGD8btDuotWj8R2dixH2UWty5bJKggxyn15yufeuvLsTFNqWltEdMIzlJF34dahZNc3PijV49ltbWayXC5++6jp/IV1VcUlJ8p2QlGC16HjV94r1rxFFe+PJ72SK8n16SczdPLBOFH0CgDHtXblkvbU5JmOFrNS55dz6B8Ea3da74Qs9YubyCZjHtkeJNuT74HNfjnGWWzo491ktGfRQxKqxujROpxxsSW/WvjYrTU6FJcpFNq6sNpP0NVZI56jW5GmpJG+4dD3zQ72KpVE2Tr4gVCDn61DSNJyVtBJdfUvnI6cc0WRjGrZh/b4MZXjNDtcc6lncauvqq4BHvTLc1KJVTV4Uut7HgmtFJtWTOOEmpltvEayYBI46c9KmSR3OacdSGfXosgM/P1pJXehy+01sipNq6ODkjGKcrDqaorC8h3fKe/enq0KmnbUet8nWld3BN8w+O9GMGnIueqD7SgfPHPelHYzpS1B7xW5J6e1XZFu1xG1FdhQHOR1pt21Iq8tiksyvLwevYU1PQxpt3NLTpRHOrNj7wyNxAP4ilBp1C60rQsj9Z/+CJGq3PiK7FlY31m0ESrvtrIthPdiepr9GybEUvY2PzvOaLk/mdt/wW8/4JNS/wDBRnSdN8VeD/G9h4S+I3gNXbQNf1CHMF7pso/f2cpweAcujYOCWH8Rx1YylTlTc4q78zyaEeXERnFtNPofDX7DP/BH3Sfh58VofCvhLR/Dt3qyT+X4j8XR3k+pXcEB4kW2AiSC1ZhkDAZ8H73rhhKOIxE+aW39bHqYyGGp2cd33P3I+G/wd8M/CfwTo3gTwXpMVjZWsSxW9kkW4KB1Zs/xHkknua9Op7OPNGC0R5dFSi9WdpLrM2kOLWz095iMDCLisJJNXbNp2TbZbtdcubtzBc6LOmMZ4BrL2d0ncUXfVFXVta/st8/2RIR1ZhFnsalN81h1Yrl5rF/SdZh1O085YXTB6NGRV31M48ttBdVvDb2nnRoTh1z9M1tSV3qWos4T49eHNY8c+F10/R9dfT41HmPcR8McckCujCpQqakzp1JwtE/E3/gp/wDCO3+HXxhX4peDryS606+mEGrSeXteOcfddgCevTOearMYUKb5qbfmddJcsLPc8k0u9m8Q+FnvbG8b7VZkTRbc5OOo/KuClWg2rvQy5ZM6r/hcN7q/hFYbeZVleLa7ydQB1FOpX9ppc0pRk5angPxbvrS10LUZJC0U92whUrwHB6g+tePj6qo0XbqephqEatdJnhl0EtzsIxjjGK+MlGUndH1SstCnJcKxworN0W0wnOUVdEcRICnNd7+NhU/isvISFBHSok7GT3LltN8v1rmkjWMk9xGZllDbcHtWlO1tS525S1FM2MA/WiUlcwj8RftJGUda55anW5KMVYuwkuQCfes3YS95l2JRkYNPdlPQ3fDdp4WvLK7stXnvU1GXy10kxyxJbbt3z+ez8qMdCvfrWtKhTqaSlZkONW91sfUH7I/7Ni3HiKw8WeKPD/ggWsVwrF7jxi0kTgAgF4Y2O9v9npz7V9Bl+DjRlzXizx8ZVk9Eft3+xbpnimD4dJea7qel3Vt5McemtpNj5EUcQ/gUHnAGMZr62Muagle9z5XFTjKWt7ruexStDF/pUwXKKcORyB3pqPKjz2+aVjhfHfiPT4VXS7e5DLOfOm2+44H6D861pxbndo2Ssl2MbwDdpr/jeztxAzi2SW7nkOfkP3UXoR/Fkcg/LW072d2JRfLqeh+I9A/4SWwTT21O4tVW4ilaS1fa5COG259Gxg+oJFYNO1jF1LKyNHK7Aka4AGAKhU1Bag5Sm9BrkgZyAPUUla5cY23M7xPeQnw/OYW2kgKZCMYNXFuMtDaKitzkPhjr0t9a3+m2ciyzyNsVGHQ4wSfb3onGUtWVOolayKuufsnfDHxXBdzeMTd3V3eW7RSzifaI1bsg6DHbvXVHGVVFRSukZe1q8176H55/to/8EqPiJ8JNVvPiv8GdQfxVoKZkvrKLJvLNMHLFFP7xR6jkY6VdWrSqQu1ys66deM9JaHwh8LGudA+Ll9C4aINdkh9mCue5r5+E3HEtM9u/tKSklofTOraEbvSZdas7opd+RmYbCsdyuOhIwAe4r1pRXs+cilNX5Tyf4uSx/wBkxeKLJZJDDGsczk4Yg/3vdTx+VefUfVHZTgndNHO2F8l/Ob9fn3xI5GchiOhB9az9o4q6OynBN8rQnxE8TNF4K1TTtMY+dPYySXLDqq44FX7S+7N50lGm7dDivAV5Nq/wnbeN7QxgkAcj8K5MRHmpvQ6MEpSjdnmniXVLMyNKSFZWxIhOCD64r47EUrzPR9tyqxgXWr2JcqRgg9CaxWGbdkzkeJ12Kb6paM2B0HfNH1axrCuwOpWrJjt9aiVGz0KqYjmjawyyvoYZiycjNVyNLUxpz965qW3iIRjG3jHcVhOmmdixCsWU8WvnCtxUezSRTrOwN4tboG6U+VGXtJtjP+ErkzkPyaVoXNYTcdbiN4smIz5p+hofJsFSrNrQi/4Se5dvlaq9xIVOc1qNfxLcYP7w+4NS3EKs5yREviKYvkHPtVXikYxc2yUa/IBtz17+lJcrLcrsmh1fcdxbJxyM1M2tilN9D6B/Ym+DvxB+Mfi61svBOk2zQNcqLnUL3QlnWIA8hZJcKDj+6Ca/TOC8nxVaUZ2Shve1395ph4OrPm6Lc/VLxFFp/wAFPhXH4A8OWayNHbf6W0Vuu6VyOflH8q/cssoQqVlJv4T3qMFUqe3k7JbGz/wTnXzNH8a+Lk8zHmpaRm4tvKcEAkgjAPVuvpXHxrOM6uHoLrqfKcUt1alGl0bbPT/h/pN5e+NNV0S8vpILLV7ZrVVR8YkwcMPQ8H8xXmZvKEMsp1Iq8oO/yPLzrkjl8KkVeUD4F/bY8XX3gX4g658P/FHiXbfWFuQ9tdOsbGLzAd8IPLlvlGB27cGvcwmPw88LCcPtdlf/AIYdBUp0VUjrzItf8EvP2bdb1z4mn9qL4seH3ttP0qJW8P6fcRbWlkUsFlKnsA2Qe5OawzODqUuWnpKatfy7HVUoueH5V1Psn4u6FaeK9I174oTADUrC3jfTRIQA5DH5OeueOOOeM1OXTq4SdHB01eMr833DoTnhalLDUo3i73Jf+Cdni74gfESTUtd+Ii2S3MCMyxQSCWRA8rKgkkAwWCKMgcDOO1eTxpSoYSlCnBWb+77jyeLf3eFhCMZK767dz6O8W6/oHhG3ufEstskt2kSxYXG8jkqmew5J/OvhMPTniZqleyPiqFNztBv3dzwbxR+0L4tu9Va7OplISDttIH2qi++OSa+lpYDCYena12enCnSbSjEpf8LJ0b4qaS+napfo08g2xSbcbGGec9jXNKdGnUTpvTy8j0VQ5UmjxLTbjxH8P/GesaRJbwPPb3wnikQ7d8Dcsw/IduwHFe5g5U6sXd2HXcqi3Po/4C2fh/4C/Cq9+JvjiGO01zxEWvp4JXG9Yx/q4xxwACD9Wrwsyq/2hilCHwR/PqeVVmq01G+iPm/4p/tdX2vfEVdY/t6LzlZmhjW52C1jG7B478d69OMMJRwyppqzX9XJVGLPgX/gqLZfDX4q+LR+1D8N/s0GsySx2fju0tV2reORtg1AAdGJHlv6nYe5r52tShTblB6HtYKnKnHkex8jXunp4kuxBGWkmP7ry2X5XU9QR09Kqkk3dbnoqEtOx5v428SaO3jd/hH4Xljkt9J3Nq1zCcq8+D+7z3Cjr7/SuyFKp9o2Uoe1stkeX2sU138NdXktoiXtrwSBV7jeQa9rKKVm0+pzShOVGUo9z1X9m2/lu/DU9lNpkscg5znIH5Gvm+NsFCeCbS95HrZZzzpNM7W5V0bp3r8Nc7Ox6iT5bFSRnPGPrzU88SPZu5GiyHkuQal1VYPhYN5ykhRmp503qV8SGIk5b5mqnViloTycuo7bJnAP41DncG0KIpWGAx470e06Bq3oMEbmUJk9elaxm7aClHl1JzBKqZAPPak5NbgmmQeQ7tyfrQ6lkLlW4r2vHf2qed3KVmIISOn596fNKwm7Mb5ZV+px6Gi8mg2FIdPu/hS5n1E/eGpvP3mPNae0sTbkBxL0Gc9yaaqLqNXYyVJFTkke9CqJsTi5DbVGJyWziru3oQ0oo1NOjuLm4itre3eRncARr1Y+laKLT91mM2rH7Lf8EcPB3ibwP8Ppdb1Pwvb6bEbQyRPDHtZzjOWPevv8kpyjR94+SzTklK19T9CPEPgnRPjj8M4re+maGS7sdn2hOvI5B9q9KzhLyPnZpQehz/wh+Afgb9mrwzJZaGql5XLzOBjzG9T6nn+ddtKd4csFZGEr1ZqU9+hsaD4yS+8VyT3w+WGBijHovsPWs69NpK2x0um2kmavhfxidc1qaO3tvkVsBvWsJ0pcmphWk4T5UdVHexOdrZU5xjFRCLirDumh809pGp85lwP7woauNRlIonXdInn+zWl9DuHVUYE1vChKKu0TUXs15lHxZrsWl2yRMoPmH7zcCle0jswlNyjzMwdZum1vwwdOtbP7QLklEXdgq2RgfSt6dua9xyThO1tDwL9oX/glh+zl8YPhzqdv8UvGWp6bfXkLD+1rW6EccDnJX92RhwD68/SsZznVuoxucl6ildPQ/Hz4l/Azxl+yh8XNR+E/i/VbfUUtm36ZrFg+bfUrQk7JkIz1AwR1BBFeXKE6U7M9CjarC55t4g1MaDrEhtFZ7SeTciqfunOf504qUtGXJK+h438f/GF3rHiy0sk+S2hQq2P43I5NeXm0lGml3PYy6yldbnE6pKk6LIT8xX5vqK+bjNt2PoVSsr9TPhUNxjvSrS5YtmUo8zsOQgxrXXo5suf8VluFyy9aiSRjU0ZYgbZ/9espxTWhVPUmRw7YI5pKFkXNSSHszq+FHUdcU4wjYiKRctJnZhk845qZwikbXWxp2b4xnr7VzSiaxi0i9DITwfzqEtbiuW4GhJUXETSLn5kR8Fh6ZqJ8zemoqlSSg9bH35/wTI/Yz1Dxhqdh8R9M8H6XptmZlKX/AIhvJ5+Qeih/LRWHbCsa+myzJ5xala19bs+cx2LhTW/MvI/bfwPosXgTwLp2j/uhKqxo/lgKpdiBx0/LrX2FODhaPY+YclWncx/jp4h1Hwz4Vi1SxQtGJik4H+0MA/nTUkqiv1MIxTqnka+KZ9cZxI+/a6KSueo7Z9OK6qkobXOuEbvVaHp3wM0yBrO98RopkkuWWBZ+cMiegI4GSemc0ndpXIrrkjY79CA+DIMgcLnms5SSOWMFucb8SPitB4eEmkaHeQi9Q4mkYbvK9gO5pRiqj97YjncpWgeX65+0J4lsphJYa9NK4OGhuMbTz7cVpCnTi7M7o4Rzje51WmfGBPG/g+4+0qhlBxP5bDCEdCfbjH41TjTjNpdPmW4NVY01Bu/XTT1/4Fzovgdpmm2Phe58cXKxxveSMBLngRISufxIJ/KsKtZS93ojKulTl7NHiH7VH7X93pNz/wAI94Q1GKHdKI1aSYIp5xuZj0FFHEwpyuEKbtqfJXjT/gpv4r+FXxNfTrTxRaXsqzBWFjdrJFKO4z0Ppiu6riY19+pUcNKo7rZHjn7Vkfwa+IPxY0/40/C7SodC1TWrT7R4j0e1GIJZQebiIdFJz8y9M815eJwtKFZTi9T28FCrGm4N6HH/ABX8Tap4h+COp+H9D8RvZ3YRGjkhB3ooP3hjrg9R6Gum9OeGfc7IUo06t7Hnngnxvd+JPCz6VrsiPcSRBbqNudzY5I968dVOh2wpylK7MLSrjVtDvJdKtJg1uJMxHPIFKVrHXLmTsi9KHvtH1SS4U77qykEf0C9ayi3J3N4xU9JGF8A7lv7Fk04qpEkZRgw4J5612KKdPUqjJRhY4b4neBtS/tGYf2eyAuTlUDD8D1r4zMajpTaS0NYQjVicLP4WmRgsrnjpnivKWKk9i1hIojbQYY+pxjoc0vb1GS6KQ5dEV8FW/EGolWqAqV3YI9F8hs9P60/aTkipUGtizFpm87T+BzWUp2Q4UWTLpAzhhWLrSZuqVmDaZEv3gDx60uebL5LCjTom4Cj8qPftcmw2fTU29BSUmXGBElqo4AGO5q7NomcbMdJZBhlgPbipUmtBxV0QLAsfIGDWlnJXJnFp2Q4QlmBH8qptQVjKzRseFfD2nazq0Vvq2vW2m2wYGS5ukZx/uqigl2PZR1r0MowUcfjI0pS5VfccoNrQ/WD/AIJffsxHRdPt/i34l0TXkt7aAf2Pc67cCES5Ucx2qHbEnoTlj3r+hspwdLLsJyRbbff9EehGdHB4R0qUm5S3XY9M/aE8Sva6hLKZre2mfO15Hznnge4r77JcBTnL2vL7zSV7a2XS59BRpP6rFdD2j9jPSLqH4JR3Woui3Gu6jNcSGNQAyqAo49OBXy3E8k84bW0EkfAcRVm8zbS0gkvvN29ubzwp4l0rVBEqNNrKtveXAKlgh+nHb1qZezxWDqQetodvmY+ypYnD1abbd1+hr/tOfBL4d+Ldd0/xvrfgXSr6+KeWl1d2quwYcrye1eFw3i5KMqLbstTyOHsTGFGdGf2XdHCuFt1S0itTGDLtdFTbGqgHr6JxX2cEnG99l/XzPoZTi1zI1PCWsWGpx3GkWBW5gCv9ql8vcHJB+Rc/dUflzXnV8M8K/aOTu3dXd7f10XQh0pX538jo/wBifwtp3h8eJrjTVXbNcxYZYwoP3+nr9a8DjTESr4qipfynh8Z121QhfozzT9qz9omH4X/EXxP4E8WX7WbTTR6hpkkowtzbm3jQhCTyVdGyB615mW06ccOqvr+Z8vQpylRUkrn5p/E3/gvP+yX8JPixP4E8faf4rlhjufLvNS07QWMEYzg8uVLgc8qD04zSnmlCMmmmVQqRhUtLQ+s/hH8a/BPjbwDo/wC0N8H/AB7aeIvA+uu32TULOTPkP3jkU4ZHHdWGQamjNYmLnDY9ZYmnUTUGdf4P8eeGfiH+0Z4O0PUpY2ivZ2jmII2ywpG0h3HqMbcY+tdsMQoUZRhvZjpSfsZN7o5b/goV+3Zo/wDwlcng7wlr1v5cW63jSUrtVcHc2eiqoBJY9OvavMoTWGg02r9dO/r/AF1R40KSTcpbH4tftF/8FatJh+JOp+E/hZJqWsadCxt21iJEVbxw3zNGD83l56E4JHsa4KmMlN2jsjqw2MwkpXcXpsan7K/xY8ZfGHw7411PxRZTw2T+HiiR3U2S7+ahQ4HHBAOK6MLSr1acpy2PXw+JVestCt8U/G6/CnwDc61plyE1S+BtNKBX/VOw+aXH+yMn64r1cso06k7z0SPUxElGnofP/wAEtPlsDLeXErNcXAkeSeQ8uTkkn1J/rXZOfMtDLK6M53T1RZ+Hdump+CfFWlSP9+0lJK9QQ2c124NuFSF3udlenGFKUEbv7K+oi01BrU6hdESDG1icfUiuXP6Cq0GjfJ5qneJ7NeQJ5hGeOvPFfzhjYexxMovuex8TKTxRg4PPHWuZR5h8mhGEG7cR19q0UEkZtXYpjGfu/Q1nKOpUYWG7Bndt47irUFYc4ocLcOen6UKKQlT0Jktk2kEDpzUTSTHGCiymtu5vcIOhrppWtqRNJuxrSaeNn3MHHNRVBU2V2sCp+79KiEU9zTSxG1iScbcetaOMUiIxs7iHTWHJUVLkrWG4pvUY9gQ33c0RloDimgNqqgll/wAah3bI5EiH7KQ+K2ilYUopjhAoPK1E4ohKxHcWylOB+NJLUuxXSAo3oK6VFJGFRo7T4N+AfFnj3xrZ6Z4UmEMvnrmduAgz1rswWHniK6SZ52Lqxp0/M/cr9i74ZeI/hL+zvPLr+tyXk7WOwSPKCMkY/Cv07AYb2NJRPhMRWlXrt2PqXw5qF34P+H2hSw94EEqZ4INdFozk7owhD2l7nnvxI+L19cavdWV9LHbR2zlXaZ9oUfnVKrTp6dBPCy5jqPgR4X1LxBoNx4k1HS3htbzC2L3QIeaPqZdvVVP8OeSOehFc03KU99CHVvLlR6Ja6Xpfha2ee00UuqjJ+zjcx/Dqac5SlHluZNK/NuJ4f8YeGPErtHppYOrcrLHtINZuE6W5FKcajsjkP2jPilZfC/wqXsNPa5v7w+Xbxp6kHkn2rvy3CyxdbXZHNjsXKjFKL1Z84XHxi+J1nD5lncvak/NmAEc+/rXv1o4en5kYOnOpaUpXO5+F37Sc/wAQ9Pk8C/EGdY75B/ol2y43+mfevlsU7V/d2PpqbpQjdHbfBL4gWGpeI59BvNQB/s+Jmdz06gA/rThecHYxrp1HeJ80/wDBRT9uXRNFvr/wrYanGNP02N43kWXHmSlTwPxrSFSFL3UcU/aXtHQ/Hq5/aIuPi14h1KzOsi9g026eSNhJ5ggaTG6IP35AJA4zXm4jkTsd2Ea5bPfqVr+9iMHm323aiGRs9sCs4vodip2ep4h8W4HOn6Vq8nD3Ms0jfi3H6V4mbwcqKfmejlUoqs0zkmmLxYOfZq+fUVF3Z9JKp0IUDp1PPUVnUXtNEccpNXaCBS8a5Pbit5VOSbKnf2raLkTMo4PPfFRKpzLUmV27k8LA9ajncS6bsyUzIhCkHNNTkzWSbQoviHwAMU7uxnya6l6xc5GTw3vWUpvY3i4xNa0dCAAcHPFYNvqVKpctIxzx+NWmkrijZbnZfCie3tPEkM6WFw955q/Y7u3mi/0Vs8sYpFbzeOiit8LOPtkurOXGTtC6P1t/4Jj/ALMnirxN4y0jxr4z17V/EMcRWd7nxDrO54hgEBLVMLHg8DK96+3y/CVaaU3O6XQ+axlXD+zb6+h+mmrXcUGo2OnLcBC8wKpj7wAPFej7T37PqeNSp+65FTxsLVtKVb2382E3kIkTZuGC4ByPT37VpPlULs5oK9Uoaz8IfDuqXaS6fK+nK0u+6is0ULcDHQ5Bx+GKlXep0fWXGOp0sNlaadbJZ2qBI41woHatNWjllOdSQ6L7JLKZ4tjOPlLjkj29qycVcmTex8VfFfxV4y8I/EbWFvNOnvreO+kJNr8zgbjwRnNdUP4aMKUrM5/TPiZovxU1Cfw9oglsNWtYzKun3bxrNcooy2xN25sDrgdK58RGU17srfce7h8QuS80VvAfxkXwp42fRr2ZRaajC8MyycYfB2n8xXNSqONT3mdU3zJOB23xD/aw07wd+zv4f0fT7xY/N0oSSKrfMzFiQv8An1rnr10pJozeFUq7kfmL+3B+3R4Y+Fl7JfeNrxLjWNRBk0zw4sg3bTkCSQZyFrnq1JVJtpWb18kaTdKm+TdnyX4R+NcvxS8bD4pfGHx/pWj2VvgotzdRW8UEQ6KiZyT+GTWkMS6dNczOn2bS5paWPZvg18ZtG+N/xQh1fwjcPLoOnxG0sLmQMPtIJ+ZwD/D6etdeFc8RLnvpt5lUqsLe6dL4sme31i/8Nw3TAwSshCv93J6H2IolJRbgdtKPtXcxrHw+gu2vEkaJ/KCsQPvD1rn9mraHpQTR02m/DaPXLlLu5v4Y7cKGkkHDY71lW54o2jDnOf0TV7HxP4n1KTTlxYRhrazHqigjP4nJqKDctDGjJzrtGB8DreOHWbizOcR3LL+prvgmk0yqN3JpifGnw+1lrM1xFp8hU87hKyj/AAr5HOKDc7xO+g+XQ8svUMjYOcj1NfN8qg9Tv5o2sUZ7IypgjHpVqa6CUVJ6FMpNZNkdO4ptKWpjUi4K5ZtnW4A2EdPTpS8gpTTdidYXjPK8YrKpFHRy2Jgp24PfpkVz9Q1IZbdmyR6c4reNkS5SegQxFSCR75qpWK5UPmiDDaeK59mLmaehEbUghsVtGV0NJyGywkrgrwenFKyuS7xZCtvlssMe1aJ2WhEnzMmtbKa6nW3trd5JHYLHGi7mcnsAKzk25JLccoWjc+jPgb8NvC/wB8R6V4j+MPh2LWPFl00c2i+Cmi80QAnCy3m3Ji5wdmC2AcgZFff5BgHlU4Vq0OactYxWphGT5W0m30S7n63fs0Q/FOb4Dr4s+L8enQ3+oxl7PSdOtkjgs4v4VQADt7V+u4FVKlaEZJp9Tpko/W4UYpqS1k/0PnT9qDUtM07UZLu506I3kinyru4kwoGegr9byqmqdOMj7im5ypxp9D7f/Z4024t/h34UsGhQFPDiSyDP8TjOfevyPOqqniq1RvedvuPyTiGcYVq7v9tL7jl/HkF1e6ysDHdJHOPs6bfuMDktjB9P1zXuYPkjhm+jWp14eXLTUo7W1PVNXTTPin8PZNE1V2WSBUPnICDn+8tfIYf2mU5gqkFo76HzkIvLcxVWG0r6Hlfxj+C/jTWNOu/D/gPxJNaALGGZFyzArgnk4LdOtfTYLNqPs1OqrN317Hv4HHUnFTlvqZ2g+Ebz4feFdQtNQsCTFbGOa9kHDIByTj1OcjvxWtSvDF1afvXZ3+1dStFqW/Rdz1z4M6dH8NfhfF4h1O2itn1K6gLrGMBY2IVc49jn8a+RzibzXNHTp68qf4bnxGcVHmeaOnF3UE7fqc5+1v8As9+Dv2gdBE+saNbXslshWEyJ8yn1Vuo+orPKqiox9jVW+pxYTmpU+SW58FfGP9gXR76zufDN1oc11A6MHjvH+1Q454Mcu4Ee2K9yrhMPjEqfJdW306HU3GppJHzj+xD+z18Uf2QPjj8QP2Y9NQP8OviFoF1rOhW3lsV0vWLWMyMsaYBUSRhsY4+XHbn5+ph/qWIcIX5JfgcUcM6Ff2kb8vU818Bftw3Hgn4xw+KNZ1gN/YUV+i4JQndDJGny84PNeVHF/V67s7pN2dreml3+Z6KqwlTcY9T4v/bm/a08S+IPCt4LO/8AKuPFMklrYeRlStgrYmcZGcO2IgRwQsormr1qtSblJ6s8fGcuHoqhHT/LseA/BP4V3Gr3STz2zmRyGx5eeD25ruwWB9prIxwcJvXufZ/wA0N9Dx4BtUiDanFtRSh+eXGUQn1JGMete/OChhnCO59RgaSpPmaPEf2j/GB8Z+P5NBhJ8vR7doHiY/dnZvnBHYjGK58DVnToOJ2Vr1KvKhnw/s/slooMQCiFunsDXVCMbpHqYWDpQ8yH4GWkl5p3iKc7cNbT/j1r0HONKUX5mE5Oo5FH4Ca7df8ACTb9P1BhEHw9vcJg9eTkDn6U8e1XpOxGX80a59JXd5ZXjhn0+MExjEiE88da/D+IqWDoYqUfZ6vqfS25Xe5Tmso2OVPH8q+PUlFhKbYxdP2dBwD1zScwSuElmAcED60kky3TXLcja1L8LVJWMVoySG0CrlutNo6VqSR2/BBGPas3FMzqRsQWcCm/Kn15rogko7GNNe+bFxb7UB9ulI6eW6K5RCwyMVHMkYNqLGG3Gdw6VLldDu2I8A28j9KhNg2ypJGQ1WQr3I5FwCSPyq7qxVTa5GqhznH40cxjFsSRcNgCle4P4hjjI6dKtRRVV2KsgLMdorW6juYKPc+g/wBhT4T6f408e2s2rvqEq+eoW1tt6I3P8TjgCveyWnCpNSPBzOo4Jn7aeD/Do8PfBu08PramGJxGptxLuOMjvX6FCUYpKSuvu/zPlXDmq3R7T4x0/wArwBaW6KSILVMBfYCppzXO7ijaMpD7v4PeDfGN9p3jK8so2cwRySwSxBo5TtB3Fe7fX8qzkouepn9YcYuJ0Oq+NtG0YixU72GF2pwBSjaTOKFGe6Lejava6ynm2wbg9xSqPSzLnRcCvc+F7eDXk1/T40icn/SEUACQev1qYylOHI2ZNLRo8Y/b++F/xH+JPwpEfwk8RJpetwMxt7l0Dc444717OVYhYecoy6nl42ip1IyfQ/Jb4j3v/Bf/AOBWty6j4ai8D+ONJt3LCw1DSvLkkQfw7lcc/jWeIlipTfLqjuozoUqd4Kx9DfAH9o/W/wBor9nNvjJ4v+Gs/gTxz4W1b+zvG3hZ5cizuQnmJJG38UUqfMp7cjqK48Q3TjeR14Wo2nd3Ob+AH7fkWoSeNprPWFaSPV/7PjRZMsFdAcj9PzrmwWJXNKT6HqUYU7pLdn5Ef8FS/wBvLx1+0B8bLz4E/CXX5Tpun3zR6rqFnId15dZxIoYchFOV4+8Qe2KyUqlSrd9zxsU71nCL0WnqWv2R/Bs/g3w//ZF9E0UZjywYdXHOW980Yujyr31qj2ssoKnB3O48U68968mmWxz5vyyEHotcifKmzaVSPPyo434824g0vQbRRjZGxIFeNmuIfsoxPUy2nZuTOFRcLgD8K+fm+ZnrNSepFIvGTUOTirIjlujQtdI/djB7VrJ3mdNaNqjRKmmOoxsHualpcoo07ssQaW7Hpik7WHKk09CddF3dV7daybd9BwV3YUaKAwOBn61Sk3obuknEtw6ZtA+X9aptGXsrE8doyDI/U0ly3BU3fQvW8L5C5znvU1Gka2ilqevfsxeCtC1nxna6pqmla9Pc28oNsmkTfZdwyCd07fKF45AOelerldGlN3ktTxcfNyTS2P2+/wCCWHgfw9pWnXetaJpOn2w+zAFodWN5cHOP9Y/TPHOO9fb0ORUrRR83mKmqCufWl+lo/ia08yyEkqo5WUsP3Yx1x79KFG8zzqU37Jq5X8Z21zf+GL+2tFBkMJKZOMEc5/St6llTOeCft16kPhjxbb6t4ITVZ5N0lvF5d1g4JYcE+2etTF3eh1SoWqpdCvrfxB0W0hXJBAI46kfhVRTvqZKDUjV8LavZ6vprXNspVQ+COeuBTlH3hVqbUbnyZ+3/APsC6j+0dr0/iDwf4x1vQ5buBRevpFzJF5hAxn5T1rSChOn7OTsebUUoTuldH53/ABN/4IQfEH4BeJ7D9pD4afG/xRbeKvDV/HqelahdXk0hEkbBtrbv4WxtI6EE06eEp03ZNtnVSxFaS5XHQ9H/AGs/iXe+EYpfGCMthPeaHFqtuoUjy3kh3kAez7l/CvHzSlUo4iVKpFxa0aejX3nu4KdqaUjyT4m/tXaJpej6S/ijWI7iDRNBheW3WTlvLgDysQORjmuChJU6kHbmSto7/pY7sQ1TpSml0Pxe+Iur/E79tj46eIvifqM7s9/fPIHkDMltDnEUK+ypgAe3vXs0KTnK0T5alKdesuZ6s9O+Ev7Ba3+p27+InnuyCGIMTCP8TjFROjV9tyt2+X6nfLDpz1dz7i+A3g2x+FcNtbW0aI6YCIhyqgdzXVFfV48qPaw2H/d2R0fipLq1+Jeo6ncxEw6oiTQsV4GQARz715c+b2zbPbw9PlopvcstGkUYeSVVUsPLcfypymki7tMp/EDxhPoXhB9H0d2W91H9yjKeVQj5m/LiuWrea1NK03Cjpuyh8MNOXTEhtk+6AB/+uuijyxVwwVLk1kVfhkRZeONQWMDC3rZU/WutSd2UrKszQ+LtnrKau8i6iskEi/LBcrlGyOlfP5mpPVHbTV9zxzWLaS3u3SSy8jn7g6fhXx1eElNt6HW4uJQk25Cg/jWSLjoQXMQljwVwQODQ52NJxU42McTz2M/yDKk81rBnn8jpzubWm3kV7EAxAOOOaicm3Y7VUi4k0qMhwOlY2Kg0MEy45I59aOZinoKuGOFX6HtSc7kR1FEfOfXpxUqxTjYGTnp1q00jWDWxDMAuPenza6EVb9iONA8gXcOau7UTOKRr+H9M13UdYtLHwvBdPqE0wW1Wyz5pcnjbjnP0pUaWIxNdQoL3+lhVZSjBs/Rf9gX9mi6+GPifSND+JYsbnxTNL9oj8OW9rC9xBkbjLfzgblx1EZYknsK/ofhbJK+CyiDxdrq7+Fc2veW78k3ZdOpvl2GlSw06z0j36/I+9/iTrUf9lf2RcKjrFEFPkttHuBjtX1uVYf8Ae866jyui1iPaw699T4+/aM0/wR4g1KK21fUrgzmZVg0+FSQ5LDHP19K/R8JUqYej7y0sfY0Y1HJSex9//CJBZwQaa8KobTw7axomeV/d9K/Fc1aneS6zf5n49n1pU7p71JP8TzrxhHcXHiSa1RVWV5HBkc9ADkBeOucD8a+owzisIn0sd9JXoxtsdd4J8T29tpk4a6ZGkt1ZwTuVZAcMV455/WvExmElOonbr+HmcOJw0pTi2jqbfVJG1sNd3X7uYoyDbkNx146HNebKivq/urVXOSUILDNRWqubmq2sEskiw+GvtaGEloSq7JST3z/nmvOpzaiuapy6/NHjQqykkp1eXXfW6NXxZoVr4h8Iy6TPpKzJ5astsGxhlwQAe2CBXBhcRLC4r2kZa3epwUKsqOLupfM4zTbnxVpcDwXOnysinLgRl8c9OBzX0FSODr2kpK/3HrP2NWdrq5pH4beH/FkH2rW9DMMk3BAjwenU9cfjiuCeY1cLLlpSukcFSv7Gemp8lftnWEP7MvjzSviP4W8Jx6hLpU5uPLlkVY5oSCJImLH5dyFxwD/SvVoQqZhgnJf1YbcsTQa2ufhl/wAFBj8LvhJ8Q9d8aQeG0fw5f30lza22j+PdLeWXe24QPDn7VGRuZSfKyAPTp8jjqMKM9Hdt7X1OBYp0XyuPkfIfhb4f/Ez9pjx9/wAJ7c+FbgWRKQabZWdq7RW0C8JEgAJ2qO56nLMckmvSyvLK2LXPKOhrRo1MRd1Op9afCP8AZ4ufBcUI1e1eJwxDCeLHzAZwQR/nFfVwoU8PCzVj1sP7KmktzSj8I622prrMAeG5tJyFkgGBuDbkYgd+OP8A9dcjnHmbserTbjqec/tTfCuS0+NDfEyLTVitvFltHe3aImFW9HyzfTcRvx/tGvJnXXtGkjso03GXMc+9pFpWkXl2Twlm4ZcdDg100a1mro6o1Gk7DP2ftNktvDOoyyoM3FrKM5xnKMa2xVe6VjGSlGm5JbnHfBvVrw+JzZ+XADFcENGwG7GTyOlaVJynTvcxwXM6+qPpJ40EcZCKCYxwvTpX4nxTJSzOSZ9Vy3AhgO4Ir5NkirkEGpGnqJcZJ4/SnHc3iyu7MvJ4OfStnqc83qSW0hcEe/FJnRB6EsbHJBFKxNVkGn5bUj35rePwmNP4zdnU7Rnk4/Osps6VsUpbdxJkd6hRvuc04tMYyFSFLdqtRRpCyQ1+BuxxmjlRcloV7gEHcvpzimkjmd0yrKJCMdRVaFuSaI4EbPOcUppdDK+o+RDnJwfeskD3uQzDCYIx61d+xNVoj0/S7/WtRj03S4i80rBUUHvVRU5OxzTnyo++v+CbfwI1TwP4rstU8UI91cvIGS3Ops0cfH9wcZr7bIMC6NnI+YzGrd6o/V7w/Fd6/py2ptViW3MKoFXtkV9Y3qeDK8Z3R61rUX2nTltlIwkSggd+KIwtIzTezNHSbf8AtLwrBaF2Tda+UxXgggY4rOpuzncvZVUzzfXfCvibTZWisonuJUfAY8swzxWEW7Hp2pqHMen+F7W7sNBtra9hWOYRDzETopp2lJ3PJrVFKbaLc0g5ya0howWpz/jPT7/VbNILGzEu1sum7BP0rtockZXk7EVaSqKxiP8ADDwtc6a13rWgSlwuSuQSKudZ83LF3Lw9OnBWauz4v/bF8C6R8Oz4vvPDFm9rZ+M/Dn2DVJNOMX2y28suYrqHeNplj3yDacbgxGelc+Kw1SpQ5m/ka1KcFCPReR+FWo+K7n4Vav4p+C/7HereNviH471e6ltt9xpEyDSy5Km5mLKFWRUIC4+UHDZ4wfFoUcdiqyio2S7dTKriKGGTjTm3J/h6HY/sq/8ABG74qeFtOHiD4jaXJJrl0PMnSLD+XnnaCepz1PTNfW0soqUaXPP4vyOVOCak2fQXjn9lm++FnhOSzutJMKTL5Rd0wd2Ox7nPavLx1KadlqethsYpRsj5rbR5LHVWgn6rJhmPPINfOzk4txZ6FOHO02cp+0HdxtrOnaej58m1yw+teDmcrzSPawSSOCebbHgfhXn01bVnqJc2hTubwLlWfA6Zz0qatuhpzQp7m9DqHyAgZyKH8dgrt+0dizBqCEcHjvms5SlYISdyxHeqOQ4pcztqbSd0SLqTE7Ff8QKV7IyT1uWbeR2IO/I7YqJVDp5rrQsQswOAx/AVLndGMm2yZCx4Gc57VpBqwQTvoXIAVwMk57UTlG5tyK15H1F+yl+zpreoahoviP4uxw22lCUT6YureN1hjjU8iQWqbmP0OCSa+jymlWpyjKXy12PHxU0r8iP24/4J9aR4Z0n4eSQeHrewVQqjfp9rLGjjn+KTl/r0r7Cm4+y0PlMxlOcFc9jum0KP4iQTSeYb97RkT5jt29Tx0zWftLVLI8tOahZbG3IkbI0TJkOpDA+mK3spaMyi2pJnD6Np50i9utGS1KW16GTywPunsaxb5Gek6iluc/qvhDxbP4ji0WxtTGjOB9oxn5R1ye1bJ80WxSlCCvE9O0nSoNE02PTLMfKg+YkfePc1NPm3ZwzrOT1LCwrKNsqAj0IrSdhRcUtTgv2iL7whB4Bv9C1G2hmuLiAqkK4yMjqf/r1eGjUnVT6FqpBM/Hj9s3wN4d1PwmfAvj+21yyGkm4Gg+INEs/tRS1di5tZ7fILqrsxVlORnBBFZZnhXODlJfMqjXdOrzX0Z+a/xf8ACvxh/aj8a3fwu/ZZ0TxVq8d1PJZal4j1LTP7NsY4s7JUUMSXOQVPpyAD24sny/F4uV1H3ToxWYQqL2N9D7U/ZK/4IL/FXwv4AsLOfULSyZtr3T3MZ826kI5OOwJ6Z9q+1p4DB4enZuzPEninQleMTvPHH7E2vfA/On+IIXSOJ8AW6DdK3ORgkGvMxVGCTaZ7eX4v226sebX+nW+hai9vHG6uzbc3CBWUCvHlZM9+jVktjqrqPSNR8Iw6xfRI32OQASFQCUNctZRcT0qVSdrM4/4iWVnYT276dOxtpZ0YKTxwNx+vGa8upeM7I6Hscne3DeIdVOoFQFLbYVI+4g6Vavy2ZMYupO7Oo8HWxS9RV6NtJHvmtqeh3QXKjF8EokPxG1VEHAv2z+ddberOaGtVmj8Zn0ufUHsdV19rMmMFFYHa3HHSvBzCtTinzM9SlGaSseM63a3tlOVnvBPEeY3STIx/Ovk671bvdG03KT1MuRskH865k7mlPUZNIVTcvTNQ4sJvlZThgW5kYOvBNWrg4qcQuLG505xLAuV9q05YNHJKnODL2najHdxiKXg+9YtWZcKj6j7i32negqXF2OiLU9wgYdCOO/FLkdhJWloTOP8A61Q1YptsYq7j0pBFNu5FcwgkDGPwrWmm9RzbejC2tSWGc1U5WQopQPRfgf4S8W6t4rg1DwvrV1pfkv8AvNQs5RC0a9yZWwsYx/FnPoD0r3+FctzDH5pBYeXLrv2+fQTbnI/Wj9hH4PaD8JPh23j+6i+XVWDNfz3DTXGpSY5fc43bffvX9DUaKwtFYOjNye7b2O6vFzisJh23Ldt7I9S1fRNX8bpNDZWjW1pLktI/yZX6+le/hsVRwSXM7yOuFXDZdBe1lzTXRHhnx38FeCvhrbN4o1TVPtd5boDbLv3bCDnjn1r6bBYrEZhBrlskj0aGJq4pXimkfWvwe1ddZurK/knIGqeHLaRWI77OgPc1+ZZnSVOhJL7M2fmObUJxw7VvgmzkviFLNb+L1MirsjumKr0PmY+U/TIz+FevhtcIrdjspOKoLl3aI7XVo7cvK9+JEJka1lzjavHt1Y960VJtbev9eRE1z9DqNA8RSTahHJdxmJoViEfltkAMOuPXPGK82vRUabitb3Oerh0oWXU2f2lfEXxi0z4O/wDCR/BiLzdRtHjnubeJN7ywocuij1xXiZNQyueZSp434XdJ+b2PmKeGoxqzUt1sd98Evij4f+MHw803xzoEoaO8t1M8TDDwTAYeNweVZTkEHmvBzDAVsvxUqNTo3Z913PExMZU6tpK3qdRNFBDL5rsqg9sdTXKnKSsjNczRU1zxFpui2Zubp+gwqqMkn8Kqlh5VJWiXCjOZ8Y/trftCeFPFemy+HNY0O4tmt1LzpPYO5liwc7SB1HHPNfZZdhpYKlZSumd9pRo8sWfjv8SPgn+zz45/bkfxLr/hi01GE+CNYu4UvbZXUSQrCImZWUAsodiCR1rnlg6FfHc0oo8itQXOuZ6s+x/2QIPgb8PdPn0nwxpV01xJpkYkTR9LSCMQuCG33LAhc8/KvJB4x39uo3DCpxdtbGs5V+fl1Vl5nTfEL4c/CTXvC1/rn/CJWssUMyxwO8TF9PbadzySybQz7SQCpz8wGOTXnyrus/elsdNFTp8r1PifxHo+gQeJb+GG5byjKUi24yWBxu75ODXBWqKKsj6rBy54ps8j/aE8Xxa14q1XwYthB9i0fyofNZT5jXGwM+PQDIH4V5FOnKVVzvoepGbcfQ8H+JN5/Z+hDR1/19421hnnbXo0YvmJ5k2dR4KhtfDmg2ttKgzJYzzuvfaE2/1NaVlFaI66qcIKJwHw6isbrxNHqEdnFE3nnbKjg5Gf4hiuqMHOnoY4bljWXmfQJUskZLf8sx/KvxHiqPLm80fRqOtx4QEYxz6mvlOpDWoAdz/KrkkVFK5G/DFiOPftTitDWySKt5IAoGKd9TkqaMbYuGGcdOvFEnY1pXLKOST2IqFIursQ6a//ABMyfeuqPwnNT/iHQykYAPTHUmsZL3jtvoVrmRIxg1N7GNV6FRrhJOMH6jtVXFBajZJQAcjt2obVjUp3E43dPpxSTZzzi7kSSFzn3pOTIaW4oPzZI/OldsmyEdgCMj86aTYa3ILiZSOh/OtY0+5nO7WgaBGs+sxRTXNzErOAWtP9Z+FdNH2fP7xjKlzKx+mf/BLfQNO0+Vb7Tre+JBG6bVLre59wP6V9vlFWnCFoo+ZzJRjPlkfp58GrZdWsNS1QQP8AuduJGH3yPQV9FBp20Pnql4NI7aC8W5shKzDcGwW7VUtNTN6PU1vCWoRrY/ZZTyJSFYDg55rkbfOYVVzamhPbAzqyfKASzOAM/SqskriVTmhY574x+LfFPgv4S+I/GfgjRF1PVtN0S4utM09wSLiZI2ZEOOcEgDjmtaKjOai9jKcJ8ra3SOZ/ZP8Aj5Y/tG/BjRviC1xbx6ncWaHVrGEFTbz4+ZdrEsBnOM1ti6McPWcU7rozKlVVWipbPqj0pbcA7v61gp9Acm2c18UvGtr4X8OTRl5VmkjIRolJIrqwtLmnzPZHTSg0uZn5u/tnfF6+S4ns7qVJLaSNw85GyRW/usp4/GvUdKElfoROVRq58b/sr/HHw/8ACiP4sG1sNDmD61Z3t0LuKMXFw00XkxIhZl3nfEw8vByX6jByZfVo0K7ktP67nPLCxrQk7a/ofYfw0+NXiiy0aLUPETBNQubGGa/kitk+QHlbeNQCqD1GQT1Jp16k69R20QUoQpxUJanjf7X3x2ufiJr6PNdpONOgHlRKqCEud2RtXgkEjJ5ry8TdSaPRp0YqHuKzPgXWpLi+8S3bSRIjNqUmEj+6Bu7e1fJVtcQz2qbl7Fdzyj4xXwvvH9yqtkQqsY59BXz2Yy5sQ0evgo+6cpc5RMjrXFGTasepGSjuc1r1tf3ZZDIygn+E4r0sN7GK95anjY91azfKz0ez0eHYoJ7V5lrzPoK0OWbRPHpMG4LuH51bWgqagi5Bo9qQcn65rmqSd9Dfl7jk0WISZHQds1PvSVhOmmrotxafEFAUj603BEqDLEFgpJ46DpmsnEtwSJo7SMHORnuKtaItJRWhaW3DMqQozFiAqKMkk9sU+W7QnCUtz6i/ZF/YzttJ8Z6d8T/2lp5PDel2jJdWFj/b6x3sxyGDeSm5wPTJX619FluErYeoqlWXpqeVjOSHuwWp+3P7C2s+ENX8DGXwjphsrAIBp8LQyIzxDjexbhifXJr66FnT5ou6PiMdXqVJuJ6wmnawnjkXiaaxs/LbdctKOCemBUqE+a5xy5eS9zakYBgM/Wu6EX1OZao5u61W2bVVaNCGEnA28nmspwbR1wi2kmdMi7wH8vDd+OaINNamVRuN0hUkhaX7P5i+ZtzszyR64pykoszVOyuRXU5s43mZSVRSxAHJxWisxqKtoeAftAfFXQb21mWXT3hdFI3sACwHqa9nD0404bnM0qj0Phv45+MPCPiq0u9OMlylyUPlz2jRllGDkhZFIJHvxxyDW0+WUPeV7F1V7TCundxk9pK2nnZpq+1r6d0zyv8AYj17wreeCNDisLEvJYa5r0D3slggmwmo3BYHawCnAznGDgVpl01Qw1lojGpGXNbc+5LD46ab4a01tK0nWrtLeNRIMLiU55AMr/KOOpHHYClUSxDbRtFXS5jy74o+OvhlrZuPFWvTB5GjJhvLW3e5vRkEbd5H7vOf4RXJiYRhC13byNYpQkmlqfI/xYHgG51Yz6ZaxbsktMyyPK2T3L55ryK8aXPdH0OEqzVNJo87+JfiWWw8P2fhXTbIyXGs3qRWNqpLExqQXdsYwAO/qa82rJ8p68XKVuQz/iR5UDW2iwTGWWODM3pGduMfz/OvN1lM9R0mkmzF0q1WMoWUhV6cVq2mh25WdT4QjH2pTt5G3knpzVwk0bI57wBELv4havP2a/f6da65PRmdOC5ncu/Gy68P3N82l63bRHZENkshOF9M4HAr5vNFRatM76bvojxXX9ITSrhjbXMEkLHKm3n3gV8vWoOLutjWVkzJeT5sdQKzSSRvRI53Owrmk9ya25HpEm64I4+9Td0kFHU3VgilQowyCO9YSm7m9kmZWpaRLat9ptRx6VUZqW5z1aKesRtjqQkHlTcMOOa0SZjG6LMcY370PXtSafU2jPUmJXbyPxNS6aOhpNDl247e1Q0ioWK8p3SfjxWkXZGdSykOR9rAAj396h23ZjrM+iP2VdI8EfD/AEBf2jP2lNRePwhp9z5fhbwjG5WXxJfKeWZR/wAsIyRuc9+Bk5r9C4TrUcrofXMVPlp30Xd9/wDIuFJQblOVkfe3/BOP4w/E/wDa/wDFniL4v+IoZx4d0yRbTTbRNM+z6ZYooG2G3JbMjY+8Soxxyc8fouS5zUxkZzcbRl8Pccs4wWHwUqNL45P5vzbPor4ka/cfZJ7Nrl7e0KbEEEO5nPoq/wBa+4y+jShadry8zpyvD03OM2uaXmz50+M3w98R+IdJku4tNa2UISlzKSZc9ic/dr7fBY2jTsoz18j6aM/e0ex77+yT4rbU/gp4W8RXMonuNFdtPvnUklgjbd3POPrX57nVK2Y1sPf4tUz4HPoWxlWhH7auiX473dtB4qnv7ObNu8QnhcrkEgjP6EitMtjL+z48+60Z5OAjVeEip7rQwNb1+Cd5bmFPKWGCKKEquAA38VdtCNlY7VFwjZFzS/Hxg1GeQymJkktgzk/M59vY1z1cPF7rTUzlGbhqj6E8AeMLCy8HWuua9dqsThw7lDg/N1+lfDZjhalbHypUFrofHY+jOriJQpox/EEegfADX5fi94fhjh8Ma7KreJI7eElYpm2rHd8H5Vx8r4HQgnoa55SqY6n9XrP95D4b/iv8jhjRqYuLpz+OP5Gp4o+MvgOQrHc6vtG3dDcIcrgjO4HvSwmX4pq6SJ5PY+6eU/GT9pC103w9dWvh/XNOntxHnzdRmKgk5OMgZGcdjzivVw+BhRqKpUVn5EKTpS5pH5zftq/tK+HdEsG/sDWbaLVJbdw1zpfieQxnK/cZByM5IOK9L2lotv5DbqSal0Pzif8AaFmsPj/qXiK2uIJH/wCEA1yFD5zMzNJHEqjLZOc4x64rw3mUqWNl2scvsqlSaklsfRHws/ao1nRfCFrc6fZ2dveaaIH1TVbi4knuVhkKoSkchMCheB8sZbDc9DipY2M6fvt+h11JVZUnyRV1/wAMew/Fn9tLSvFOiR6nc+LvDV87QlHm1Ce4nnjOAFIhOyEH0wo69DXR7TDRo+05rCoU6z+K79DwbT9Zm13xRP4y17TLaOxtmWSaeC0VBLkhljRR0ZmAAUfyBrwsbjqMZ2jq+nzPoMthJ6K9j561XV7q/wBR1jxZ4nl2td6pcXLoHyAzuSEB7gDA/CuykuWmj3lBKNjzuKzvfiF43ifYSpkxGo6da66WiuwjQdR6Gudeh1rxH4lmt5R/Z+k2H2G3cHglR8xH1bNdMlCPvMzqVVzycfQ4n4PWuoHxCsioZojISzhOF5749K0hUS9Dpy/Dy51KR9LKCsMKMBlYVBx9K/EOKqiqZvUaPfnK7AzdMDNfJPQyb1FLELz+dBa0K8rtuIB/GtOb3S7qxRumklfYeBQtjLkV7lmxQRqCB25rOzkwcrEyZy2eKd7GjV4lbTyP7SI967KbTicqvGZ0KZL5b8RWNR2OpPS5FeQK4+8ee9ZczuLmvoQpBGqj5RnHXFXZsmasxJIUbjAxUy0JUtCtJaxcEoM9qEmUlcrTKqsBmrjTuZyVnYaAmOn61Xs7AoXIbh15UduvtTTsxONivJ9zceBitE9DCUlHYveBdF1jxD4kh0vQtImvZ5JABFEcd+5HStaGHrVanuowcpPc/W3/AIJyfCbxB4I8HWx1zRYLGYqGUMc7SR6k5zX3mV4atTprmPlMzVN1VJrbY/Qn4Ah7fwbfwyXpuWMpbzCOOnQe1e8k9D5+o26qZof2itvpU8e3aRJ0IrZ2sXNXbHafr0WnsISxHyq3B4zXLUSvoL2aW50w8V2r22T8pzgZ7+/0oW5P1ZJ3RY0nUoJofJuJFA3FUJPB9qbVnoZVac07o4bxn4B034V3V78Xfhr4Qi+1JEW1rTbKIKb6HqzIowPNHJB78jvTqVbw99mEYU27vRnlmn/H3w74qibxL8PvGC31nKx3LFdkSW7A8oy5yrA8EEVrQeHqRST1HKNNvVnEfGP9qTXtL0WW3kv3uVKkeTcWpcdOxrv9nyx902UoqnZO5+d37Ynx/j8ZWNzpVzaTfaYyXRnzDJEBztB43L7EUe09nBqRg2pRUZadT4i8AWfxL8PfEPVPF2p+DkntNUvrEacuosDueATyhsN90E8An8K8XB5vSjjZQT09DXExqTpe4tD6J0H9pr4pz+F7fSdU8D39nDFOzx2sOLhBIwAf5+p3bV+g6V6k8xp25VPRamVKhUlJe7oZHi34tWGg2F9/wkWi3Frc3i+ZY2c0JWTcGz0zkIWzycdBXnYrMqEYe67nsU6Da2PHbXUvOun1O5xvZmlfHTJ5r5+lLnqXZ6UaTSseJa/dy6jr17qMv3prhiOe2a+bx01PESt3PaoWjTVig+HyB1GODXPCNlc2lqrlC6giydwq6k3FaGMaPOtTro73YgxIc47VlzLmtY9SvzObshIdQmmkKjPvVOcYrUyhF3uaFpLJgbnOD1rllNNnRzpGjA5zkt+IqeawKoTpK2QF/GpbbJUtSdJQq/UVPMzZttD1m3jrVRd9CYXvqWYWOQd3Q9abhK1ynOTeiPoH9gL4I+FPip8UYfEF34inSbTrob47i8eaQuCCHhtV5LDIAd228n0r3crp/voqbfc8bMIq7XU/dj9jeW+0zSDo8EdxIjDMtxqt2HupMdyi5CgdMV9fR9mlaGi7Hy2NjCUOaW57lq3mJYySJdCEKhLSEZ2gda61NJHiSvexFBcw3drHeWz745IwysO4x1roi7xIs1NJnFNqztrUdxOwAEozgYPWsOZtM7qkoxVkegShkJZD370U7uJ58neZj+J9Zj8PXFrr9zb/AOjBjDdzqP8AUq2MO3ooYAE9s59a1VP2qaT1FaSnGXTqJ4z8TaRoWhvdXd6i70ypDc49R60qFOdWei0QsVJ0qbS3PjD9qT4teFjpt/GdQeffGylFh2BcggMrA9QecV7MZRpR1PNpqpLbc/Mb9pv4030EMPg/SL6yeebUvsrz6ncAtEXPH3F86QgDJVUIGeetcmIxlOOkWbfvFWje+v8AXojyH9iX41z2Oi69qOibml0LxjrC290mnlHnzM0pJaRwIwFdiMDOQMDJJqcvxMbSVSW/Q6Z+2rVn7NPT/hz6Sg/bL8N6RDAdevI75Cp+xGC7div95XG5grDqCVx6cV6ixNJWUXoa06dSUXocv4q/a+0nV7mWPw3rr3fO7ytT+Rkz/AHi6jr3Fc2NrwcfckdlGDejRyE/j7Wb6Ftd8UX8Wm6XvBeSW7YrKeyqvVyewA718risfBStfU97B4eoo+87JkHhiz8QX3iO68feOIysx/daJZxSHZBbg/Ljoeep9Sa4Z4tTk79D6PDYNUZXZPe2slxdtczJvaQ5JJ5z71jzpu7OuWhZtdODyAytt8tcYA4Jq3NWMuV3sdD4atFWVpwm0BCxOOmBRGqrmvwo5P4PYuL++1A4JlvHKk9/mNdPNzRu2c9GTnJlT43Xj3esy28tjDMVTCiUYYfQ968LMaiTs1c9GmlGN0ePaozRMQIlTjgBQD+NfPVJXlZbGqTluUBLvYbhjnmsJOyN6SaG3TbYyc8is4ybYqi5loM0kgzlipHPpVSk7GdJcstTobeWMKA787RWMnc6ZSTRMHhkG1iCD1FS/ImMkY+saMgYz23GDk4ropVOjJqUlL3kUrbUXgPlynB6c1s7PY423B6k5vGkOVb6ipem50U5uSJYblig/TNYzRvFu42Z2A3Dp9aqNmjOd3K4sEpDguM89+9KUlDUum0ndnpvgWPxP+0p8YNC8C3a7lkgisNPtkQ+VptrGuXdFyAuFDOWPGck5rry5182zWlQd+XRWXRHNjZKtVSvp1/zP1B/4J8/E6Hx94o1T4ffCmJ9N+FHw8g/s/QhFknXLzP769mcgFyzZwOgr+j8Bh6WHwKUYe9ok7W08go4WhHAyr04pym7J22S7HuepXt3qusy6pf6itpaRPtiWNMs3twOK+koQjTpKCV29z3qdCOGoKFOPNJrU8/+M+iXOt2M1rYXC7JASsVuSh6clsjk172VSo0mrqx2UFNQV7pkn7Amu6fa6p4s+EF+wt47hI7izWV8lWYYJ6fLlhn8q8ni6nKHs8TDVxetux8xxHQqKUMQtWnr6HafGb7PceCb7T4yBf6NJ+8GOqucPx6ZOR7NXk4OVXnU38MzwnOVKqmlpI8S8HfFNdU0W50O8vI2uLC9azuieC3B2Ng9scivShUik7dDopzc/eNDTPH1sYDqVzcxurWKRMCeVdHwre5x/Ks5O6u2aVHdWeh2Nn8eb2902Pw0+qStbW7yIYomzuhzuY4yDzgDJ4AzXEo0Pauajr3PNWCi6vtEj3/9lH4j2vxn8A6l4U8Y3FrfId0YsmjBH2ZhgKwxg8dfrXx3EGFVDExrUVbu/M8LPKUMNVjVo6PqfKv7TXjRf2EvG0ngX4zNfzfDi+l8zwx4itImmn01GzmCYAfNGh4BzuAx71lSxVWdB4n2iTi0nG2stHrtay66p3atdXt5Ek61L2iV31PDfiujfG3Qn1v9n740aL4ismUyI+n36SyJ32tBuDKce1bSzt1F7rsa4eEa6sz4w+MP7IXxq8ReKDrty95BIzg3U1npP2ZFXnLF5GWOPsSx465715uJ4jqyrXhpLyVvyKq0FCm9dEfKHje7/ZW+Hfxy0r4TeJ/F+hfaLiwuoNb8T6PfSXsNncs6+THcXCHYy/LhvJBVMjkkGvMoVsWqkq9TVdjCPsakoQi7d2e8+Dv2Yfidqeltr3hnxvpGtaZKqeRdWutW0ttHFzja8bA7f985FZLM6bqNuTSfTTT066+bflY7Fg40na6a7oZrafBn4N3Ij+I3xE0KfU1X5dK8K3I1C9unPRcI7Rx+m5ioA7GprZzKp+6hDRLf7/Pp6W9dTsjhacVGSmvQb4l8f6lB4Cfxv4mtE0e2kV4vDGgxPuNsGXDzyMf9bMVPLHgdAAKWW0qmMxKb2R71Cly4dpKzez7Hzb4p8VXHiK5NtaMVtl/1Yz1r7SUYRajHY15JSaNOyvV+Gfw61H4gTri6aI2+mK3VpnGN2P8AZHP5U4uM6igjfEVVg8M2t3ocl4bB0f4T3LEkTXsoDMTy5Jya3lLm9083D0X7JN9TofgZoZ/t1ZI5GETffXOVI7g1hi6lOjRnVW1u/b+tT6KjenTWh7FdXAWQBRjI4r8Fx05YjEynLds1i+ZiJcAk579/SvPlCxooakjTcYx9aOVWKqKyIfNAYk+tKUbmClqQSj94CTn3pRibxaa0LEMirFyOnfNNqzIt7w6NwQ2DUpXZpK6WhUsWP9pHA79a7IJKJxRl+8szfE205/OsZq7O2zURlxNgjBzmoUEZJ6kLznnHHArSw6juiLzmIwc89aTgmKGqFEuRk/kaFCwTdihcOxfdyKpWQk0xpnVI/mOPxqlqTVbSuilPMWk+Tn2FaKKtqc/tVchmlYoQDRZRG7M1PAeq6zY67FFo+rT2hkcBntpNjH2zV0Kk4VUosxqr3Hofrd/wTk8Ea4ngu31rVZ7tmkUGOTUtQaUucZ6HgV+g5e2qSbe58Zjm3UaXQ/Qv9n5oJfCl0kF4J9spV3Xpn0Fel7W7sePWvzIg1HUrex1ybSbx8eaepGPyrR1E0dTp2ipHN+IdcbTbyaymnYHAKlTwQKzbj1MpN30K3/C17QWkZa7aMn93CAfvepq3KFr3Kg23Yqa18d3Fn9isro/u3CqwPVvWl7RNEzpSk9D2L4T/ABN0b4j+GBPHdIbm2AjvUJ6HHX6GsXNSumcNek41LI+Cv+Ck3/BOn4mWviy9+P8A+xv8QLvwd4jlXzL+0tV32monrmWI8E/7QwfeuR4fm+B2ZFRNwTSuj8t/j1+2p/wVE+DZl0H4hfBjStca3Yg3drqE8Kygdcp7+xrSnPMKStKpp6XNqVakqcm46nyz8Wv+CrP7VeoxS2rfA3QNGuTlftlzp813In08xtp/EGojCeJm/aVG122OaeJ55e7FI8I0X9tj9sDRtc1bXLH4kX8lxrdxHLqEd5p8M8cjINqbY5EKoFHACgCtp4fA0oWsdNKVaM3JS1O68I/tVft4/E6+XTU+LV9pdvI4Eh07T4Ldj24KICK8evTwcHeMb382egsbiKiUItfcfR/hTwjdeA/h4V8S6vd6nrmtOs+p6jqVw008gH3QXckge3SsHGMIXZ30YyS97cz9ZvW03Qbq83cLEQD9aaqKMHJnarxPI5JGkUue/NfL1J887nrUY3sVY5gC27045rdK0DWdk7Fdmy2W6A96wqu6YpS5EdS0UXKgj2Oaxu+Y6pSk3qOtBHGQcd+1aODa1FLfQvwHeeBg96lxikQ009S7a5B5br0rOajYpWb0LcQ5GTj8Kw3NoxsWERGAyuPSk7o0THKAh+XpWlNNu7JfxE0fI2g9TzWsnyq5rA+lf2N9W/aE+KXj7SvAnh/VJfC+iW0aJc3GlaSts97GP4pbkrtiXGcyEkkkY5NfQZfiZVlGFRWR5uOnGMZt7pH7K/sd+LrTwW9t4D8MTwa9eABLq4sbhvs8Xu8jkmRvc9T0UZr66k8JO3s0fn+JliHRftXrd7dr6fhv+h9V30TzWbxggMyEc9B/9atGrqyPOjzXuYng26mn06TTrtwZLeQ7QOMoTxWlPSKNa+rTOE1G7W18QS2pbmCc/eHQA0tIvU2nSbtfqeiWHizS7yyS48zLYxtx1pXdtDGpThCW5Pca5oJj8i8njCSja6SDIIPUGp5mtwacFfoeEftSeK3/AGfNEW/ufDVzqHhW6DGO6tB5kulv12hD9+LuBnI6DIwAfXKlGWuxzypqpGx8D/Gb4/fs/fEC9muLn44eGXitcypb6pqSW8tuwzx5b4ZDz1+vrRXzegqVm9SqWFipX6nwF+2r+27+yb8Kby81nwH4qt/F/jMqws57JhJ9nZs5y4yFznBbOcE9K8mM8XjJpxVovqeolhaFNyqvmk9j5m/YH/4KjWX7Nur+MvB/x7+HUXiLwh47v/tt6kEKtPpt108yMHqNuARnPyjrzWuLwGIqQToys0uvU5cBVVOrJzWjPYfG/wC1/wD8Ey/ER/4SHRPHusadIpLJZx6Vc7hnkjaBtz7156/t2jLl5L/NWPbq1srcOWL1fkzzw/tnfBy71GWx+Cvg/X9fnY/u7zU0FvAvucksfpiitPMeS1SVr9ERTqYSmrrVnpv7P3hXxz8ZPHln4j+Id405hYG2tFyILZf9lfX/AGjzUQoqjTvJ3OzCzq4yqkfRXiaKO61VkhGIoVEcYHoK4PbWkz63llcoHT3kcKFPyjog/nWkavMHs22WbfT1z5iLgdCCe1X7S2g/ZstapeR6B4N1bWpBgR2jhcnuRgCrpyu7owxMuSkzlfhFA9hpUDOvJw7j1zya9KP8MwwqtT9Tnfivqr3uuz/ZoTNGrfKpQ4x/vdsV89j1LmPQj0R5lq0e+UkQeUM/cDZxXjyVjqpqT3KKQFWyV/OuKbbZo5qOgTwF1xjr0qEpJlRtJDILdo3JBxzWiVtzGpFt6FwRyf3jg1nNxvoVGnOSJI0m3cN+FO0W9iuRx3JNzIMSE80+W+xUblHVdIE6GaAYOO1EZpaMmpSUjLtZ5baTypuueM9615brQ5OZ0pWNCOZSodB1HbtS5dNTpp1eYfvJT5jyahKzKk1fUWPAIY8n2okoyMryk7H0H+ylolp48sz8LvhdM2haprFtIfiJ8RNWmWOPSdIzhrK0Gc75R9+T7xB2DA3E/oPAGEwlbHtU1ZpXnJuyS7Iyhga+LxaS+Fb+h+kP7G3jb4DWdtL8AP2fx9osPD9nGLq9ZSrXTEfeGcEj3r9iw2bZVjsRLD4eon7NLY+kqvDKneEl7uyWy/4J7J4pv9J8PaWJNYgWN0/1UKMDz+PU17OCp4mu09F87+nRdPI56Mqteq3Td13OKe81nVbOR/DOixQXDxuVuLkZYg9hj+texGhGnL95PQ9iUKcVzTkcT8MfAfibwV8WpNV8SSu/9tWTQXN3EMbcZIK89Rz69q6cd7OvhLx1sePmLhVo3h0O1+IGt+Kvh/4ohHxGgim0bUoBbS6qsTEyRsvyNNgYRh03Hrx6GvnIexqx/dvZ/wDDnyWKp050eaG/b8z5t+M2haj4E8TalqOjzrJa39sClxEeHkQZjfI/vDinVi1ByicFOtKy5tzg/Cnxy0zxV4du7eG4VLizZRdwBjuWRWLEEdhjvXlyxU5UlrY7m3V96S0Oi8J+Jr7UP+KjtL0RS3xeO0t3l5C55GOvPr71lCspov2sYR5Ue6fsv/tCah8JPEo8YXMIbT5YkiltoH+ZwAdzEHpkg45p4+jDF4Z0n8jzMbgvr1P2fXufVfxn0H4Hfty/Ai/sbC9sdVit4v3oDqz2rlc7W7g4r4aNOrgsR7OotGfKvC4jLcT7KstGfhL+2n/wTRj+Gfiu81f4dape6bJNO6wyWE8kLZ5PVCCOlXWwNJz54bF1KahOL/mdl9zfy0R8E/HT4Q/Gn7U2k6/4/wDEl6gBBjvNVuJUIHqHYiuBwo4duRjPLpTneXU8r0z4E3bTme7jllUNggqRz/WvOrZgmrx2Lp4ZRjaKO68KfAOW4YQx20uwj5kQsc/gOtZ1pxp03MqjgJVZ2sfSvwN/Zw8MeA9KPxC8ehbPS7dN25kAaRwOFUHqc1hhlLFVOWC1Z9XhcupwpKU9Ejgvjv8AGa5+KfiR3tx5enWv7u3tkYbFQHAA9vf1r7vL8NTwMF1fU7Y3c7pbGJ4H8K3euajHAybF3AvI4wEGMkk+mK1rT9mvM7KVJU1eRh/tB+NLXXtTtPCWjSj7Bp42QgHrz8zt7sf0xVYVypxv1Z5OPar1PJFbxnqL6T8OdKtbUH95eAsq9cDvXo4WknK8jZJqlG2x6X8G7i1t/CD6/MpiaJckcDdngV8/xZioYTCOC6o9RSSp8xvp4qguSiqcn0r8ZlCUtRRrJy0NKDUkljBUYzXHN6ncpxSHnUVIwevY1Mr9BN8yIjfEyYU1N2tzJRs9SRHdmDMRU8zLUorYma4AH0pJtsTl74sE4w2eapuxstUV9OuFfUCP9qt4ytE4Iq2INua7Ctg9az5tbnouSsVptRQfKeRziqTucj+Ii+3A5yO1KUtDZpNDftuME/hQmKNo7DZL7A5bH0puRFV3K0l3vOcggUr30JhaOpm6vdyeWQnB7VpTaT1McS26bsUYPGkNlF9lktFLkEFiM16FOEHG7PHjVmpixaq90AQmBjnNYVeVXPRhVujsvhHqsWneKIJx4dTUHEqkQsM55FGHjJ1FYzrV7QaP2W/YLsvHvjD4dWUmqaQlhDIoHljA8pcf56V+h4CH7hJ6HxWIn+9tZu7+4+7PhH4ctfB/hBdLs4VRfMLMwz8x7mulpQehyVoxdkVfin4WfXdM/tDSlH2qA70VerYobSVzak/3bjI8T8deIRrmmmz1J2tLy3+XBOG/+vWMql0cvI0zyjXLjxC5F5bTh/JBVQxwAO5qLu9zSPKloYtn4pM2ryi7ldF2ABi3Ab2pqTUjq5F0Z0vgD4wX/wAPYb2eGaWF7y2MTrnPfhvrW8KkVHXqU6Kvc9p/Z/8A2ltP+NOgXfg3xfFtv7Fdtvc3Q2i6Ttx60cvNH3NyK2Fpxd4bHgn7Y3wg+D3iCeaK402E3MoYFPKUgt71pzqKtM4p4GcldH5cftTfsxfD+0W4uLXR40lIYPIYlwpzwF4rL2tGCbSJhQjBe9HU+aIf2ZtIdSsmi7J+GMqxjO3tnIry604yeu5ccJOqrLQ9C+HXwK8KeALZ/FXiUJb2sI3IHUAyHsAO5rzpTtLfQ9PD4GNCPM0UfEPiKTxRqb6llRFnEaDoF7VjKU6tTyO2KSdzjPinrqx6XHolq2N/MpHpWeYSlGhyo6aFOM58z6Hnk8jKvHT6149NRbPVprQqsx5x/KuqTSQpNN3IDvGd/wCWKxUVN3OWvPmWhvrdSKgGT0qHGPtNT0KqlKbZc0/dJjNTOdloCk+Y0Y9wwfT2rB3YtWy3almPtniplFo0UWnqXY9w7moHdonhV2G0ZxSdi4ptkgjKrmqpu8i7O5Nb7R171rKN0Lmktj3T9lHxf4evfFuj+F/EPjbUGje7WOLwtoVjtfUnP3Y5pAOSc4DEjGevFfQ5ZClOMW+x4+NjXndWP2Z/Y1tZNF8N2baD8M7bTGtDvW2tY/tM1uSMYZz8olOSCc5UZ9Tn6+EYQp2hsz5bF4eg5Rc0m4u6v0equvOza9HbqfYFjPfSaGk19hZjFl8cgGhS7Hl1JRU2oHI6Lr9vpPi4QyuAJ22OxOBz0/WoVVxepuoKpTZjfGrS30LVV1+JT5NypD7ezgd/w/lVO/Pp1NIS9rRXdHBad8SLm1FxpTXIURYdW/vL1rdJRhczlTVRi3nxKbXNTTTtNu2lkBAZWz82fb0965/a8zepo6bULWPYNY8N6J8VvhbN4R1r7Pds9oElXcG2SBeDx0pWTXK9zzJxcJ2Z+Kn/AAUu/wCCXfgzxTrF7eTeHk+07nK/usY69M81x1sPTeqNY3cbH5W/FT9gjSvCWrSwweesYLD92SMEdauOJlTp6O5ssLQSvLc4T/hl7w/DdLYra3TTsPvzOQn51yzxeKqPV2R2U6NOUdi34e/ZW0tr0C608EpzIrAkn2Fa1K9edOykawjQTase5fBn4HWFkYYLTSVGCF+RdrLnuQa89SjTd3uCpSnNJH2n8Gvh/D8PfBj6zdq32mdfLt/MXDEetefi8ZOoz6vKcD7Gld6ssRafJdzl1XdznJHSvPdeN7HvKJMuiyKmQCDgkt61Ua9tiuRDZLEb8bBjHIBraNRyZPKcn8Z9RQaVZeDYGG+8mElwAeiL6/jXdQvJ2PNxiUmoh4YaOxh+Vc+XH90cE/SvZirU9R04pw5Tyz4matcz6pKZZGKFvlilG3zPy718/mLf2TqppU4qL1ZxkGqrfXX2QwmMhgPKccr+PevFlGT3OynzN2SK3xF1238B28LXvy+bjBJ9aqhgnWg2uhw4/F0cHJKT1Zj2nj7TbmMOLgYx/erJ4SpF2aJoYyNTYtW/iS1n5jnUjPauapFwlY7o1YWLi+IrZV4cGseSTZcK8WxV8S23QuACexrXksgqVUoit4gtJG2+eM9jVKErGUKybsi3baksiBdwPuKxqRszqjbcp6tbLIDLGMH2rSlUa0ObEQjNe7uVNOvst5TDpwc1q7WuctH3Z2ZfGQMqQeKzTTZ2zXMtCSBGLH1Papm0OCstTrfB3i/WvD+mN4e8JeHLWbU9RuVjiujHJLO7H5UjRNwX7xzwMk98cV6OXY+vh4So0IJynp1vr6P+vQ58TXlSpycNHbc/RD/gnp8ILH4AftBaV4R8ceILvWvirqmmPN4u23hFp4etdoaKzYAYknOQW/ufd65r9X4LyzCZbUqc0r1XH3l2MMDRrVMHVrbQtt31Prr4oNoltqP2/X75G82UC2tUYEk5756Gv1zL5VJUlGnH5n0GWyqQoqMI7bsj0C50+NcQokiICbiNec+27PQVtXVRySudVeNSpHffY8i+L/jvxjqvjO1tfBGnpFb2Eq3D3UkvCoG5jjIwWcjtXu4XD0aeGfPq2jVYWlTw6U3dvc9T+JfjTwfo/hu2OsTbLm8tkl1PT9QhISQkdTyx3YHTHpzXzeCw2Ir1pJx9xXSaPlquHqV5yTV4rZo8/wDix8N/gjf6bba7outTWcF5AjSWEcp2cjHAbgUKliYNwqLQ8OtQxNNe8vmfNHxe/Y28Kz3Nz4x8EeLJLC4nDBpLSfDynGBlV+91rgxWBo1k3bbXT/gasmNao4KNtDwbxj8Iv2mfBF3D/wAI541muAYmjj8xMsFIOc9NuRkf5FedLJ5022p6Ee2U3Zo4PWfiN+2x4chl0rTtRjtoAdm+RGLKMEHBPbn8c1hPAYuyake3CUFHoan7O/8AwUR/b2/ZO16fVtG8HaXrcF9GYdY0lneNdSQsMlyOQ4XO1hgjPesMVlmIxdLkqfetH8mtUcWMw8cwav02PrS7/bg+AH7UGnw3HjPT7zwdqs5XfpOrIrpEzKQQJV4YbsYPBGa87EYCtTp8qRFPLnCnbc8C+M3wX+DHiMPfab4w0q5jnt55IjHcJlgg3MMZyCFINfHZhRxEXawo5c5Jtx0R4DefAP4TaJd3T6h4ssljR8JiQEncgkXp6rXmUcNiKr0iy6WXUprRnP6z8U/gT8IJE/sfQbnXtQABjjVPLhBIyCzdSM16MMixeIXNN8qPRo5fQoO7R5F8TfjR8TvjLeRi926fZRyFrfTbNNkcQOc7QOM9yx5Ne/gsJSwFpQfvLr1+RjVp03eMVZPp66swtG8HQQlpNVnESJ94sMbffkc11KpKbaW5tQw6ptNlX4mfGvRvCGnt4S8GOkt5OoWTC/8AoRHYHt3rqhh9eaocOZZjTo/u463PMTcXFzexy3ku64lk3ySHue9dEZJz0R5NGM6lRW1PQPEsH2rwTpyGPKpcja3v3/pXpUm022j3qiiqS7nsPw50S0uPBQ0d02iQLJhlx+NflfHuJbqQSN8OnKNmWk8EpFMCpAAPFfncqzkjd0EldGtbeHHSIEEe/NY6J3IjTk9yzH4eLDt0rCU9Tf2dhjaCFc4I4z3qo3krFOkuUlXRV28tg1pZIxcLCHSU3bQR+dCilqXGkmrlmLR4/KP0rGTtIuMbMy7CxC6oUDD73NdNNc0TlqRvUujam01GblueMVnN2OiKcUV5NEi3A7unrSTbGkmIdKjAzxT5SLO4w6ZGOeOadrDlBrUrXOmxZ4bjuKFcIpSITZxrxt5quXqRONiK40q2uF5ORUXlFk8qaMyfwxYrcb3xx04reNSclY5quHg1cc1jbRjYgH5VXKuph7N9Dsvgn4c1zWPGdrbaMSi+cvmyF9oUZ9e1b4apL2qUSZ0ouD5j9qv2NNNvz4L0/RdN1eOWOFV3QWThsnHJZu596/QsDJeyTufI4xqlKyPs7wzcPa+FY7f5lYcKsnXP1roqSe5xQSnJNhp17dTuwHHJyT6UoNtHROMYnJfFb4R6B8RbJ42hEEqrjz4mwc/UVjWhfbcznTvqtz5b+Kn7K/xQ8OpJL4c8R3EiyElYd27P1z7VwVKdSD0ZknJK1SKPBfG2nftFeBo5FeHz2jGUQwHAH17GslPFQ21Omk6Tdle5434m+PvxOsp2t/Emq31oWBMnkwZC/jWMsTWjK8z1vcSSe5lwftBtaahbSwfFHVIJ42Db1vRFtPUEn09qqGOmtbmroa7Xueky/tc6T4xtpLTWdfi1O5sLfM93Bcg7gByzkcV0/wBoOUVpuJ4enZq55t8W9S8K+I7qWOS7QmPaWhkmXbGzYwM+veuKrmEVKxgsts3Jni3i7xX8N/AymXVL2KWXABt4W3FWI/i9ulZ80pu7NJxpUVdLU8v8bfEmXxPfC5uWD2K/8e4iAKRj3WuarTmzL2j6nL3+twMm2xVOTgmLgEV24SlazZLbaucZ4jM95evJOc46Zryc1q3xHKj1MDSU4XMuSwUKSSPyrgpt7nrOEYxsMOnKsWNoB9aKlRnI4JtmdcWQDYHrU+1cUYOlzGyIFEakdaptuoelKym0y5YKSQv5GqlFJXM4wvIvMwVQSPrWd4pilaDuWtNCseDkE0ptNHRTXMrs0EGG6Vhy3M56MnjBGD1FP2asdMV7tyRuBkj061UEkyHoS2yCRsDv6UTbSsNNJHpnwI8cWXg7xXo+lvoFrIt9frDdSW5nhnnViMRyTQxSSpHnGREAxHFenluLrK1OML/mcWNklRer26H60fsBaH+094x0K8+IXxvu4/Bnhqy2x+HPCOlKbeNUPd8jfIzZ5J59SSTX2OBhiGm6rsux8PjvefLDXzZ95fDmyv73Q0leeQRuv3p2yW/D0rsk4vSJ5riqesi3qfw607VZjNJqUiMGDDYgAB9aj6u5bMJYvlVkg+I1ppV/4Sk0TW5Ml48Rz7ejDofatG/ZLUeGlLnclsfM3iiGXQLt7Ka1DuIyiSDJEi56U5O8DpW90YWm3E+m3e+OfZcyj55yThF7YrjaSd0a8kqjVzq/hR8Tta8Iay+rvqaw2ifKySsT9o+ua1p1Ixd2aPDQqrU1vjrongf416XJqumxCO8eHdcWhQFsY4Ycc061SNuVIzlhnCNkrn51/tSfsmWqX01/Y6UssY+aaMRj5l3cjjvXmVLRHSg7aq58v+Mv2aLOfxQ0dpbxpYW8fmoZosMgb1PrXK6q2NfZTb0MST4CXF9rqvBpkg+VRGqRYyR3/SqdVRg7s7aWGlUlax6/4B/Z18P+DbU+JfiJcJaxJ86RsgE0ueRtXvz36V42KxSndJnu4fL4UkpyRd8T/Ejwrc3KoNyQxriCGNRhFH9a81VJVND0qdVU9EjHHxW8MxjybPSrxlz8xENWqcrbm/tEMk+Knh5n2TloBnjzVwB+NVGnO9jRVI23K958RNBjtpb6S4URopLMDw1d1GjOTRnKtCO7POLbV7nxZr0/ia+BXzWAhjP8CA8V6+HgqZ5cZOrV5uh0OkX5b7R5bAKF2klulejOXLA64x7HkXxH1G7i1aaG7tirhyVnRd6uPpXzWLm029zqhDlszjW16W1mN08Y3AHa2MfpXnSfPK5rCooMw/iJeXXxCEcV5ysYGPwohipYe9nuedjMJHGzTZyk/gu5toswXDAj0NbLHOeljCeE9hH3TW8EaLeyyeXJOTg4OTXPXlF6tEYdVZSs2dsvg98gGXHHrXBKrFbI9SGHne4S+BpGXPn4rL293sdLpXRmy+D72CUulyTjoK6PrF42OeeGlT1RZs3utPfbOeM96zdJT1TIjWmtGa0MqzxYJyPap5LG8Jq5Q1CxaB/OhPXuKcddGKtTuuaJNY36uoRmGapU9bmdGpJbmjbleOM+lKSR0crlqfSP/BPfw1oN74w1Xxro6w6j490q1YeBNIupPJtrW78t3OpzyupjCW6rlUcjdIy+mK+s4XwNOdKriotOpHSKb7/a+QqmBdenzuaUVv3fkl5n2H/wTo0HR/BfxW8Q6hffFtfGHim53Tavd2582CB3G5x5xx5jFs8jqDX6LwZleGpyqylW56stZf8ADnbGSrUXSUbRsvzPoDX/AAd4o8UXtxfxfIs0+BfTgr5K9yuBwOvJ7mv1vD4qjg0oxld9tD35YnC0KEYdUtl1LvhPV/DU+qzeCNEjmMcER+03UZGLo45wSefc0q9PEKH1ie76djzsRKvGHtpfLyOb8T6B4d06++03VoUS1mEwmLLtt/fp8zflivTo1qlSmkuv4mvNVlBO+5pfBrXNO/aFsdc8TaHYacul6VMtqur3ESm4u7heCc87VX+7xk9RXkZlP+ycVTpXblJXstkebj50cvcISu5S6LZI5H42eGvhpFr1toqiTXvE19GYtJ0KymJXZnmaXHG7pz0GOK6sJPE14OpUXLTju3+hMKNXE0W5x5YLqzxH4w/sp/EPwei/2T8a77RtUkZAbCzKy29vnsd4Jz68jgU6eHo46LqUZNHi1MuhXd6ex8xfErW/23PBurS+F/DXizSPFpVGWSSK0ZGBwTyykgV59XL8zhJey944KuBnQVo6nzl8Rv2j/wBqjR7r7d4m8AaVK64S5eO4bc4U9NxXnk8D3rz6lbG0Ye/BGMVilLmjG7PH/En7aPxvgkkl1vwfHareT8CKQiW4YDgdjt9/SvKxOYYulFe4d31vE0oa09WZcv7YfiDXmbUvHo/syONtxggUuwQLhF5YEsxyT2AxXHTzWc9KqsP67Tp0+at7pyOofGbxFqAgfSfEF6iRYkLzb0XLLl844wc498DiufF1sG56tdxVMYqkP3bdjL1L46azDJd6bdajOt2giISTdlSqgDIPbFeVHH4OpL91qjyP7SarOC3XQh0n9oTUEjNrf2C3IIAVXj3Z+mf5V1wxFCvE9WnnKaSaHXPx8kkhEOlaWimPkJ5YXaR7Vyv2d7RWh59XN71W4LU53xB8X/GusqVS4aONydwU/MBXdhpRjryhXzPE1KWisZelPGf9Ku41kaQ8yvyc+9XUquRw0ubEz95G27SXRgeYoCpzHz1H1FXQjeR7MYxppHqltai88E29xNuPkTqWxkgjPpXZVqRp3d9js0qQPY/hxPZ3/heC+06WOS3xtV1Pzhh1DDtX5NxvWp1ZU+Vnbh3FrQ2yhLfL+dfn91E6HPoTRodvX6g1LlzArWuOVvX8DQ1ZFuSsJnc2MVCkjNTfMK/yrk8VpdjqbkaDLbgOQKpN21CCZYDtsOScYqbKTNZbGTYf8hQnuGFdELRjZHDTbdbU2Z3CtjPH1rKSV7ndU+EryThnCk8HvTWhzwbvoBIxjcePeplI0krK41zgHtx0p30KlrAz5JR5vXv0NOLuc0bpjZ22jOO1NtGlX4SETqcAeg5zQ4pmNNu5FcjdkZ/HPWnTsmXV+EqpaXN1dJbWcDSyyMFjjVcliegrSS5lY89zaloer/Cr4Ba7b+O7HTvihdz6FBI6M0LTeW0gODjg08PTTrpSY8RTnGk31P2S/ZT8N+F/h78KLR/AUE08oiAiXzcgnHViDz+NfpODhGlh0kj4rFycp+8fVXgP7fJ4Ct5dS2/aXGZdhzg+laSg47s46MZKprsX4p/IsG2tiQthmP06UlLlOyUE6g+CXAIlUKiruOf4j61XxImaKFwbS/8AM1C9iUomVhiK4z71laz7mUotNI8n+IvgrRdf1FtKj0+ExRwl5ZmBOe+PehyTeiNINQjex83/ABS/Zz8J3sTy32hI1zcKfslqkYAYD+I+g+tcdWkqj1RTnKbuj5E/aR/ZZ8PaTNBJFZh5Lw4htIRksO59cCuWWFUeh1Uq85NI+cfFnwAtWjuLrw2Johbz+TcqhKkP74qYQgjaT0uefa54b8QadBJbyavchi21/wB6xO4HI3c++ayrUIN3COIfLa5yV/pV1eTul7IzTp1aRs71pxcUjOpKUmRWltLaMy25/dMcOhP3TWcveYQi3uQ7QrFVG35u1dVFcqLm0tEc9qDPLcuc/wAVfOY93xLZ72BcY0UVdp3EHjHauZtRR01JXY2ZmEZIFZJ8z1CEboyLonBy3PrVNXdjCrJQ1NnBCjmt9Oc0xDaqNFmxcryOM4pVfhNackWpo5ioUHIrlTSFOKeqLulr5SAMaGnKWhdGa2Lxk2vn+dVsya2jJbebJ5ok7RN4P3SQyEkg+vWpgnuZ6tk8JdO9U5RtqP2dz0n9nLVvBmj+L49X8ZePfG+jtHcIunWXgLSVmvdQl6+X5zsqwDA+98x5+7xXq5VUw1NOc216HJi6cuSyjc/T39hL4gav4xia4k1e6hstPiA0+y1DWVvWslYg5kf/AJb3ZJy5PC5wAACK+rwVf65L3W9D4/Ma8cNC0lY/Qz4LappWlxroN/r0X225XKW812HnkIHJI7fTFdseSFSze54teVSpC+rR38uUOK6lK0jkgk1qUtd0O18RWh0+4bYCPvYzipqwdSOh0UaipM8Y+Lf7OHja8t5bzw7qscyYyFztI/SuGUp0t0dtGtSk9T558YaN8V/Az+VqOhQXAjfKnztpb6+tc7qTknY74yptJHnuq/GXWIZbiHxfo1zp0UQYxyPEzgnoMbeBj34rlqVKi3OuEIdGaPgj4+aWyRXun6+0VzBEPKj89WaaQMCXfPTjt0p+0vG99TePLN8rWhP4y8Y6Z4tmZtVQsJrvfMRwo46AjrXBVxElKxSw1No8i8T+GfAghjkNo7vK8gnBAChAcIo/OvPrY5paI6aWDg3qcdr+saZ4feYaDo8EO9/3UtwoJVWG0Afqfqa43XqTv2PRo4eEXscD40S/8WXUk2pas7zK2YVJ3gqM8H0rBpp3Ouo7wscvJ4ctbWHzbqIAx8ATOB3/AJU6d29CYQcVdHJ+K/H/AIF8MMY7/X4HdVP7mA7uffArsjTk9zCWJhGVmcFrXxXXW1ddJtAIGOFlmHH5V0Rpaoj2/MmkZcUmoahIqz38jQqQWt84T64716MLJWRhN8zO30CLy4cL2HY9q76a5TppuPLoaPh9yljdTMhbO7MYbBIq6tROFjppq8tDy3x9fR3mpSCyuCDzkAAMPqD1r5vEtc53taWsefa7KsUohll+bdgbhzXA+W5w1XyVLMhgxEuSa5KiudVNXV2MulBjYjgEcU4e6Y4jZoPBTkXzjPGfSt6qi4XZhg4xc2egRozAPnjAyMV5kknseyrJFnau3aR+NY21I5kVpbUM/PHcVtFpIJO6sZ2p6R9oTIHI74raNTkMXRjJXMWb7bpcuBkr9KG1J3RyShOm7svWOox3ibJGByO9JSdzeNaLVitqFlJay/aIM7T6VspqS1ZnUhy+8i9pGoxSgRyt83ua55xbdyoYi7se+fsm/sifE/8AaFvLzxraeJU8G+CtGjI17xrqybbMDjMCcgyyEZwi55xnANfZcKcLTzepKtUqSpxitLJNSd0mpO6skru6vqkmrO6iU6jr2grvt1PuD9hLV/2ZvDvxtg+CPwOF9eTWOntcX2sXsrLJfhcfvCq/Kinj5SemK/XsqjlOC/2XCu87atf5n0kKapYOdRJKTVmfTXxY1/V30+40KK7aCC5lAkaHBYL/ACFfcZXhaPtI1ZK7ReEo0IRVVxvJI4/9ne70rxF4m8R3cUjXFhpEC2jsQVQsclkT168txkk1257FuhGlHRz/AK3M8yrKNKCjfml+Bwv7WXizxDNpr+GfCkJFzq7eRpdiC3zuc4yAP17CvUyijSpYdzqSV0nq/Tb5nbgqPJRU6mpc8C+EtQ+DPw0s/g94bvorGOOJ73xFfRhjHDI+Wcgkku2TtVeSc5PQ1596dbEe3mrz2RNX2FWo67jdvRI6X4V6d4L+DdjqXju8Rr7xJqroReXyBpI4xnZGD/BjrjtzU46licfUjSWkFvY4MYsXj0qd7QXQ8A8e+MPiD+1z+0SnwC+GuqPbW0JNz4t18crY2xJzgngSP0Ge3Ndsp0cowqTWvREypLB0vdex1Xxs8BfCD4CeBLv7Vdw6fpCQi3iuZ3/f3LkhWYZI3O5PLEgKKqhUniKXM/n5GdWjGdLmnufLS/Dj4V/tReOfFk/gSP7R4S8BW0FvqWqpbMy3V/Lt3KvBBEYbk9Op6AmvHr05Ymsk9lf8NTx4VaCs3F72tZt726dO72S1eiufG8PwMj+M/wAUfEXiqOxkbRrQ3cGmOqFUWG3Us7A+pwff0rkjlf1mpKbV10PRjgqlaq520XRniHiH9nC/+IHxt0bwFaCMrczS32oOpCpFaRAs7kngfIMcnqa+D4unQyXB/Wqj1Wy7voj5XO4wnVjCS6i694F8EeNviJLd6JeRP4L8JaAda8VzQzk5lWZ40tMj+J2EKDqcPntX5tVq5lh8HGpWfNVrv3UndpXa1XRqzdn0afU58HGli6jTuo01d+bWyPJxpF94mmu/HF5tN5qF88pwvAJ+YIPbHA+lfa5bltOlgopbhg8MqsJYmS96TJZNAsrxBLGoCzDBC8FH9a744d01cqNPmlsY95p88d0YpeLpDjfj/WL/AIiuazlN9zF0IczdveRLZrbOfs80YDnknPAPr9K6ISnsEZe2lyWsWY4jbsZ47fIBCyKB0PqK1VKTd2dLVPDr3UamnpIs6H5SC3+r7rz+ldVNxVkY+1lJns/hGNJfCAMhAiSRDkckfNzxWGOX7id9rHsUnF0bI9k0y3trXT4orWONUZAwMSAB8jhuK/A8fOU68uboz1YRUaV0TKQWyOPwrznqQtWShtgyR+tJuxrK0UERDnAxwetS5Noz1kKoHmYAoirmkIai3H3cdPrWkSavxDLYBmJAwcdat7GkWrE4QGNiDxjrWaepcl7pjWTY1c/7/WuuK908+m0qpszk7ySc9sVzydtD0J6xKYT9+Sx4NCbascysmWHO1Bx+VZ8rRbdyJ1JGDx70SZp9kzpU2z9OB0NaR1Rzu0WFyjNHwcHHWp2dipNSjqVIVw/zevFU23sY/AxZsEE+lVFuJcrSiVTJLFOskE7RupyroSCD9RWim73OOEffujvfgxceJ9X8fabGjx6hK9ygUalIZE6jqM104SMp11YyxVaKjeZ+1HwM+y/D/wCGumweIdX0+KeaJDb29muFLEdNo6/Qmv0PDVPZ0UpM+Nr04VZ8/Z33/q59b+AhcyeDLJ5SuWjyx2bRyO4pqTmrnPVnH2mhasra3C3G5SwR87W6dO1SnbctzloyKJ2k4m5VvmeNew+vai9ndmskpLQrazcrHA11LBhCpjiUHpnvzTctDNRclY4vV7iyv9WWzECfZrGHfcyBvvsegNZxknKwnCUI33ueceKp7KdrnWntYlnkb7PACM7Yx94/lxVOS3HaySPn3WPBmn+JdY1bxzqVvEy2ytBp8QiwsSgYB/8A1VzSnKpdlfBFKJ4BD8KWtD4ga/4N2xuEOzjcO31xWNODu7lOpOx4j8XPhMhv7u1tLfcWYyJ8mGBAGR/OipD3QifPvjHQIJnnMQMc8DbZFI5UjviuHkb2OuCTZzUdq5+Z1UZ4MgHDH3rWNNJainU7GIY2W8eJlwFfqKvnsrIVP3nqc9eMBcSE/wDPQivmsQ+avK57uGVoJFYtknjB+tctRaHU9GQzk7MZ5qYm0DIu+MkDvWietzhxWzN6VTsBHpxV3bmdWJTVRsfZuwbGBk96c03AKDRpKwkQDpgVyW5Xqayukyxa/IR1zmtbqxFBXkXHIIz696lF1txLdyW2mipG8TSm1YtxIJACKmOkSpRitS3bW89zdxWNpA0s08ixxRIMlmJwAPqalU51JqMepi6krXPXdE/ZJ+N2k+OdC8OePvAfjexjku1k/sbQ9P3zXcjD5MfMAvGfm7CvawmX4ulPllTbXc560nWotRZ+kX7D3wI174T2Qi8d31n4PjNuqxWU1+jXsaEkiOK23sVfH3pW5b2AAH01BU6KTvY+YxeFqVIRvrufen7MVx8OYbuSx8I2KT3IjYz35YzSf9tJSOWPoOB05rqpWqT5or5ng14uEPf0ev5nrkmfO9a7m7HBHW9hkrlTuYgVtBrluy2mkPktUvLcrdOVjPVQcZHvWFSn7VjUnB6bnB+OPhtoXioSRaJpMbuAd1xJ90H+tcNSlraJ2RlOK98+dPjB8EIHmuIZbeK+kVCWiVQEA9yOgrknDl1Z34ecj5q+I/7PvhmS8e+i0a5tTFGS7QsI4wfYjk/nXNUlC1kjqlUlN6M8A+MV18VfhpZ28nh3xBeTT31x5el6bOwczP3YjsoHP4VwVfdkXHETR5b4y/am+L+jmXRb7QLW4nsyDJINwGTkn9QK5KkU4nVTxE1HzPN/Ef7VXxhnt2mGnWaSC33AeWzHIOSOT1rlhS97c9D6zVjC5zOrfGX4v+IbIX8HiyaKO5XdCIFCDI/hNXOmpoiFapVerOb1DUvE/iG3W+uvEl4Sx4d7pv3b90bnoexq6SjGNjrdSUY6MrLfaxFIIdYcy7RtaVkG9D6N6j3rWLsjlUHLVmrZ2ciuGikAdx8uR8knscdDVxm2W24Rsjf0ZyX+zSRHKkbkYfMn+Irtp1OVWZMLykd/4VRJYRGpywQ4I7iu+nUbR3wp2RN4fuGiguCRIhDMBKBnn3HpU1al46HbTUYux5h8SDaXOoy+ZZp5iA7trAFvevAxLtK7OpXkjyjXoJDqCyeZuTJ2t6VyU5x18zy69O1dMtwuDGNp6DrXJKLuz0VJco2+Yrb468VnGPMzKajIXwNHJ9tkcevGa2rRfKc1Jckz0OFsRjjnbzxXDNWPRu3ElQEnk/jWW4opyYMpGTtFaWsaSjYjQgsdw4+lOWo1oVNSsIbhSGQdOmKSbixSipKzOb1DTbiwcz24JAOSK6o8k15nnVaE6bvEfY6zHcx+TcD25ocHHYiFa+jEkgaKQT25yPak530Z0KmovmR6T4R+OXxWuPDWjfDK98a3s3h3RLiaaw0SeU/ZomlOZDt6ZJ7nkV7+W8T5tgKUaFKfubNW3R34fFOnNqKWvl+p97f8EVfCM2oat42+OR8KWum+ENNg+yf8JBdIFL3Yb95EGbBYAEZ7ZIFfoHCud5diMTKjGny1NDkxGZ05YmNCF3Un0/4B9geObW01iSW3sCNsytIzbMZGPve3Ffs2WQjh7yS1k7vXrZL9Omh9Tg/aKmnU6HKeDXtNAt5PBngi2Kx83F/JtOZJCfujnp616eLbqTVSr8vQjE04c/tKnyOd8Ri4s7qXxM7pd6nKHj02QLuW1VeGcY6ntn2rWnBTtHZDUqtamoR+E5nwbqOo654avNU1qSe6jvLl0SLeVKW6HGT6NI3HHRQea3qQpe15Y9F0/wCAdEKXs56JqxzP7RXxYk8MaO1/ZTI+GaVYIzg3UjZijjT/AGd7dT2Q/hvSjONNyW/X+u5hib0oWXUv/wDBOjwzoug/Cjxh471y9t7iC6vJJNT1QA41ObJB2EgExADYvqOcZNeHm2GliJ0qUoXdTdPt5o56icqdOlFXm/wR418SJR+3L8Ydf8beILtz8O/hsC9xCG2QXl4AQkC44IBxn3NfQ+yjgadPDLXm3NKjVJRpLVo4f4r+PtQ+Df7L9t8LvhEzaa/xB1I/2vqdtHlYLd5QjSDPZQTzxzivHzBQc0qa66WMI4ejGTqTVn0RL8c/hd4c+APwS8LeAvBkhuF1qwaaa7jwcQLAd67gOrN8x9S3oMDqowdXDzlFWUFb1ZrjYuWH5krWPlf9nnwvZ/FMfHDUNNeJtd034Zr/AMI9YzXEUC25edPNcyS/KgUKAc46jmv528aMfi8Ljcmw0k1SqVU5dk+3zstfI+JzClKdTRNng37HHw1uPiP8BPGPw6s7MMbi5XUtXvJDgzGAN5cYbuoJZsdyR6V7WQZPHM8e8VNX5VaK6a9TbIsJRqZRUXVvX5dDh7rwn/Z3hjWLezUmTT5xPFx9wo5BGPpxX1VLBxpUJw6oypKSpSh0RSg8NLqOjya9pik27hWlQZ/dlsH8vQ+2KxVP2qOyOGU6anE57xbpEt/am/to9txbnEh3fxdj+PSuSrhFD3up52LpRUeZbmTaQQ6xpy6nCdkittlQdY27/ga53NfZ3R5ixKrx5oqzW5raJb7ioXJnAKyK/IZf/rdvWh4iVjpoN1H7w6CFEvhb5IdXxu9OehopyfNcxmv3tj3D4cWl1H4NnntbaIzRFZIUnAKmRWyNwP8ACT1rkzfERpYWTPbhScqTPV9HeVNHtorlVEqwASqgwqt3AHYA9B6V+H5nOMsXJx2Z6FP3aCiyaB90nTp1zXmsUdyeQfusH0pSZ0TV4hAgJz196UVciNooczEPg9KuNkVB3ZHdzbY+SOBQpJMira4tlKrgMv48Url0k2ixIQsTtu4xS5rM3a90w9OfzNWOBkb66oytA8uK/fmzJ95ua55yuehJ2RUXe0/PrTi7Iwskyww2pj880m2xppsiYtszUyZcnaOhmyNI8hGO/FaQaSOdx1uOkb93g1nL4h3VykVYScnvW0Niamw9sc1Mr3Jv7lioWUyZzzWsYO2pz+/sjs/g74g8WaN4qtpfCtiZ5PNUMDamRRk98CuihL2U00zmr0VUi+Y/XT9jv4U+M/FGm2Pizxp4rtUlSFXjtim4RjHUK3Q19tgYOcVKTufHY1yb5UtD9APCVv5XhW0jRzJsjwGbjNejKcbaHDGLvqWIPKZmHQytzUR7nVO8V6FHUWgRzED5KAHPHL47VNSRvRT5bszNflt4bFr+5R1xH+6j68+tSn7o4+9Oy2OP8QrLp+iF0QwJdDBJABcnofwpNqK8wuvaaO55/wCP7C006W38M21x5rQW7STSIudgbqSfWlJSclFGbu5czPMUm0S5sb7QhfvHCjKJfNXBILfepx5YRZdTlT0PMfH/AIelXTba38O2wkuYdQkE0XQtEGycf8B70lZmdm5Hlvxs8FfYPF9tqvlRwwXNuWRVOQrf7Xp0qZRu7jipcp8nftGfD4WWvXHifw2oVpCVu7QdMg/yxzWM4a+6PmadjyWaxje0kn8raCvKYxzXPOEky5NI4w7vPdn6hj1qFsdFK1kcpLJ5kj5OMua+dxLSrs96iuSKIOQxBP0rnnqjqtciuGwhI61nHc2ijHu5epJ71o1ocOJtZnSyqWiyB/DWispHfiI3bG28gBHIyKpvQ5Ke9kX7U84B69656ljs5eaOpcjG2Tp3796iLFTXLItM4WLJ9OaHKz0HVQy1cSOAvbnNVJrlCmu5fgz1zg4rJS0NnFNkz7JBskAbI6EVUW73QrJGx4Ij1DS9UXXdFjENysscCam12wa2L5HyLnLNjOAK6qMpqDfM90txScY0nJR2Ptb/AIJ7/DHwx4q8fPrrahcTIZFtL26mu3d5ZFbIjcsx3SnO4wx4xkbm4xXuZbThCvzTk3fufLZniOVJJux+y/wJhsfh14VtdPuXttLsSOGvlSO4nY9PlXAUDoOp+pyT9ZKrSp/Cl8j42NCq48rlKbu9Xa+r20SWmy0vZatu7PUWeOVRJCcg8hh3FaRfNqQoOEmmMfyIv3s54Xpmm2r3ZpFORnXl5LrLmzjk8q3X/WN3Yeg9KwdVzlZbGsaSpLm3Zg6/4kvdVuB4O8FxbQFxc3e35Il+vrSb9p7sTeFDlXtKjOa+IHhXSdA0eOzFqZ5rghLa3ViZLuU929FHWuatSUbJBGq3fseW/Gz4GweGdFjvdfu0kupoy8qL9xB/dA/SonQjTj725th63OtD5fufgoviM6x8XNds8okf2TQYmTHkrzucccFv8K4J024vs327ee/y/wCAd0oRclZnyjrnwli1u61jUp7bCtI8iEDJIEgUZ/EGuL2cYpnY4xjFI831r4RQxXl/o13blZEfz7VynDI1c/sbApOWh5te+CP+EN1i48O6pH5dleSbrKd1z5cv90ntzWU1yG1FuMjE8S6IfC9w08kH+jXgCXcf91+zD2Nc8lK+h3pOSuzNSF5IHguFR5oBiCX/AJ7R+h9xW1KE2veLm4qGhNZw+RCbiCykkgJG9c5MZ9/Qe9dMYqK0OfS2p0WgvDdHJyJE4DMcOvsfUVrB3kXTlY7Xwt5izImQGJ6g8H2r0aXwndF3RWtZriC8vkt5WiZZSybm4B9ff6VniLLY7aKa3PPviHImo6m41bSwjKMtNAMY9G+leFVqc87NG0Xd6nluuzJa3RUjcN2DnvXPFL2iSVzhxTadxum6hFcriMjg8isqkXHc3oSUojdZvhDEVYY470qcLy0OetUVKRpfD6NpN0xPWlVbjGz3OijFNczO/tsAD6V59TU6201oWvLXHTj1qIlwSRHMQAVA4x2olK4VHYrofm3Y70k2ODuhtzyNrcVpZGc20UriFHQqVHTvSjeMrgvejZnOazoTqxntSR64ruVWL+I46lBR95FfTNYFs/2W+cL7vwKwlDmldbGEcQ78rPcPhb+znqtn4Lt/2jvjda3ehfDuO4Q2LNAy3viaUMMWtkmMhGOFe6YCKMHqzYQ+/luS4irSliZxtCGr01ZdHnxeJeFw7vNrfpH1Z95+DNN8T67deFPgDoXwFh+G+i+N/EB8a+M/DuiXrvb2ljAqCxsXcAKZJWXz5AAMhl4GcV9VwNkTr8STx1ROKdpW2W2it0/A+gyDL6WDqutOp7R0YtKTt8T3a7+p9G+P9cstMsLjU7mOWKKNdkiod7MQOEAAr+hcKlNpR3Pew1Kc5csXucJoDeMrzw1dMwfTX1CNoxsTH2O1JyTuHLSH8+fSvRqzpKrG+rRpVwlNVbt81jm/iD4j0PwZ4JvNbuVEawWIit1d/nMQzxn1PU/U1pJyaOGrVknyrY4bwT4u1i5/ZzHjHUJ5li1bzJrt5piJJQAyxQxkcxRhTjj0+mJpUISrt/dbTfd+txUufm5222vu/wCCeIwal4t+OWm+J/i14jVNN0HRCmlae1sxbyYyG824bHIcgOF6YBrulFRqLmdk0Y05TrYl83R9T2D4nfEi8+Ff7E+jeFfAFsbXUNdtA9laqMNFHLhIARzg4O4nrk+wFZ4Kn9ZxrrX0joj0KdKUKsqvyOY/aD8PD9m/9jHwh+zJ4GUvrXijyptZuScyTz3DgFmPr8zNk9K3y6jXxuOnUjq78sf13PKpzqTrTqS2Rw/7RvhnRrj4e+Afh/awJ/aN/IplKNuc2dvIxUtjpGCpYjjcXHYVFGjTqVJxk/hdyqkMTOVnflOu+EHizw9+1X8MJfCV9NDDqHw/SSy1W3nwZri3MLCKZWB+Undk9eeOK46+IrYbF+zptcrfvKzu+1ndW+5/qd0IUJUJRlqz5e/Z8+H3wfsf2vfE/wAL/F2va5pug+I/BF7b694i0i4iUDT1XMkKwSIQ0jnADlgF3EdSDX4F49xxiy7C4rDwUpwnG0Zd27J6bW369ND4rN8JWlVTjK0Nb2Wrs+/bdPTro0ZX/BMXwD4Xv734ntpXhe5j8NadY6hFZaZeXYeXZHG2P3oUB2J+YcYOcCvueCKOLw2SxqV3+8bV7Lv5f13KyrmWXJRTSvoeA6TaaN8QfFWrXWl25gsdS1W6tTFKuCm9fl4/3h0969qveUpu250YelG0mtbnJfDzSbvwpeXVu1os8dncPDeWhHE8J5PHtyR6EV5WHjaXkgpw5YOJz/iyPQ7DxvJ4dETQidCAGY4lhPKsD6rnpUYitT9vyM8qvVpOuqOz/M4HU9FufC3ix/KUCGZisqkfLkdD+NeFiaTpYi62Z87Uws8PjXJbSNjSLZIXDEbRjKjGeP4l96FTdrs93DUbIa9nFFrzqjBELjtkEf8A1qdKHNM5akLVz3/4dabeS+BJXsITLcRJ5nkjgzIOoU+uOleHxK3DCNJ7nuRX+znbaDqNprujQ6pp8u6Nk2sW6hhwQ3oRX41jY8tQnDVfawt2LED+XNjj6VyJXN07S1LkzZjDDpinKJ2aSiFsSRyPpWbTiZS0ERHklwc9fSlewU20xL+2k8vd/SkpK5clzdA0u1K8MMmnZsm8ouxeuoR9nckfw1L0Zsr2Of0hV/tYj/arsirwOCaftdDelCgHAx71zzjZnXb3blAKRKBz161rFKxloySViy8+lJ2Q4qzI5TtjIOKxk9Rt3KOTuL7a0gu5nPREEsuAR/KqktDLXcg3BjnHPpTgrI1l8JBLK5cqp49a1SVrnPdkRQls5wPepcmxNxgd98BNW18+NbTRfDevanaSXNwokNjdCJHGejGunC0XVqpXOGrU0Z+zv7J1r4y0fwBbi+trpHaFVW6ecSM34kdK/QsJQ9nRSZ81iVBM+zPBTXMvgy0acgt5Qy3U1ckoqxx1FGNVWLdrLHvVlGPmxkmpg7mdROzKviCRLd1umt94RTgleM/WlUjc2wycoNGPqiqY47/U1Vj/AAoGxx61lzWVmVZ7I43VFuPEeuR6t4mnEVhZEtBGGxzz1xWfNd++KSVOFoq7Z5/Y31pr2u6t4omhRrS2TyLGAvkSdRn1P1PpU4eo5VHNjp4b2cIwXRdzznxlBay3FzZ2tuA91bhpwi/NGd3Bz6YrWclLQUo2sc54T8NldfvL3xjeLtRm+zSngEAYrOleN7lSjZHB/G+08N+JNfs9Os5II4hGImZGBLdeGA6deDVynd2RldnyT+0B4UOn67c6NLcu11GGEZBGWQcj2OPQ0Qd2VBa33PA/Eek3NhbTNdRYV+pUYGfWs6sbsc9UeYzALNM3puOc5rmlZROihukcXIX+dh/ePJr5StK9dn0tKPuIYmRyTyaxm2zXm1sR3eCuc96UGawMW/BDZNapnn4rW51dt+/hABHTilPSZ6dRxlNplU28sdxuJOM9MVpfmic8oOnqjUsCFQHNYODHGrJlkOWYHP0qnCy0OiNlqWpiDCV5wetYJ+8VuJZJg56elW1damc/dehdjkIIx1pKKTKjO6sSpxyDVt2Whd1FXOq+EFv4B1Px9pVh431qezQ38T+bHC0iJGuWcsqAs5IG1UA5LdRirwdP2tbWVvxOWrUhKm43aZ+mn/BOWx8D+LfiWvxH+G3guziGnxfYrSXVblN1gAcDybSMlLd2xlnkZ5nIJIUcV9tg6Srq9lY8DF04ezvN6n6S/Drws8mqx6z4tuhNOrH5NQcHYM8MBnAJ6gdh+Ir1JQhFJHgJzdC8otSTf52T07rXv3s9D1ZJEdd8LKyEfKV5FbxcXG6PKbanqUtUu4S4hdWJPXArKo0dVGEmrmVeQ6rqgNpbKtrbfxyk/MwrmUpX93Q0hyQlrqyr4WdJdRlh09Fj02wBMkueZ5P/AK1VRqXm0tka4lOMUn8T/AyfCR/4T/4q3fiK6jP2fQ4/Kt1JypkYdfqB/OtIS9rU5l0MsTH2OGUe55/+1JqF1resxeFbeRRJeyrGoXnavp9TXFXl7ary3t/W3zLw9PlpKRwfx50rTPC3w/n8N2mES0tfmTGBuC8/596mUVCmzppuTfkfI3gXwAniTwfqutWkfmRCKR328gZk4H51yU4RnDmO2pJpWPNPiN4bguTZaxa2uPLVQ8qJ99CdpB+hrnq2S0Lg9dDjfiT8KdL8S6Rf6XqZ25g2q4UZjfqj/wD1655U4zjqdClZXR4RpECatb3ngLxgHe90xvKZwoZtuflkxjlT39K50nTfKdEK03omcze+FoNGmfR9TDKkhP2W57A+me1bJycSk3a7I7KGexLCVwssShWkCbgy/wC2O49xWcXJPUdlYv2mm29wwv4JER8g7ojlc+h9q6Ias0gjsPCyl5kUoNysN4HfnrXp0tYnfBNMqSyXC6jeGzu1ikZzhJlASUenNcmKcYt3PSimo6nB+OJrN3m+02LQzKv8DkxZ9Rgd68KvVSbaJcopXPL9cso74iMRgehziuONWXPchxdXRlTTNPisDkg7h3NXWcqiTuZQg6UrGJ4yvpvtAij4+bFaYd21OTFQfNqd38ObYR6WryAZK8GuKrKUqjud9GcVSsdlaElVJ64rlqPU6FexdjUBOT1FS9jW9iJzuOAOe1JJtjcbogbCP07+laqKRnB2ZDdMTkE/gKbuVUtcqsx6n14oSHoo6Fe6iaRSQOvU4q1YyknI679m7xR+z98NPiFP8Qvj18Hr3x2umWRl8OeGUu1gsLrUAw2G+b77QL94onLHg8Zr08urYbD1earG9tkctXCKtScac+SXe19PLzOtn/be+PHif423vx08XXOj6tq19ZCyh0zVNIjm0ywtlIMMNvat+7jjiKqUUDAKgnJJz6dLOsTSxUq0Hq1ZLojqyuf9lU5U4Run33v3utbn0d/wTM+IvxY+NP7WOr/ED4j+NdQ1y6GkS3OpXl9MzIjsVUFR91eBgKBgAADpiv0Hw2r4vF43EyqO6sr+tz0sNi6kaTpR0hbZbH2N460y18T+JbWCA5hjmMzgF1CydAzkDBYDGFNftWFlRwilaNnLfTfZX/TvZdke7QnUjR5r6mV8QtWvbOD/AIRmS8aO2OQR5hSRlHLOxGME9AOM5rpp0KOJjLzXRtfc1qvVO5UJ2fM92fNX7bXjWXWYrDwX4Xs1NzrNzb6bbWLzYLea6q33cHhSSQOg/Gt5Xo0uR6tnHUppNwV7s6D9qvUtL8AfCyx+Gnh6A2MOmWiWZwVLMdi7mUHPOcgcZyPxr0ctwtVUeZs9PB4epTw2rvc+e/jf4o1X4d/BXTvgD4Ms7y2vPFfjJYdfWefdIITsZ0bgZO3cCSODms8V7T3E9ZPRaaeZxYh8knKDbk9nufQfxQn8M3t3Z6veafLFaWFtAwtpJeF2xpHBCpOAvOWOO5A70qU5YKg1LVpPbqzqoRrToOE5XZW/au05PG3x7+1zxg2nhbw7FcxxhcxwkRqF9sgv+ZFcuDxMqWCT2u3+JzUKTw2AXeTZ4r438br4p+I/iL4laZcJLp+g2DeHdBtoY8jeIgJJOmDjceemV+lduFwqlKMlO/M7u19O1yoVXG8bdNzifhlqniP4G/trLF4dv47jTNf8LJaXlldr8kz+WTgleAcknvRioQqVtev6HmR9oscnJe6+w/4GXPhTXtX+LH7R3jSztNKsPh14ZvFv4pHF1b6kzrJGLW4iI3ASMUA2FOVXJIyp/AvGPMeapg8vjFSnUkrau6tJO/rb8PPVeNmuIw9aSg5SjyXelveTTVndPS7T92zulra6dX/gj14wuPiP8O/GfhjVbGOC91HTJ7iEW642RgZCIO6qoCgdgMV+q5FJvLKbm7tWWuvSy+4WWv2+CjzX0aPFfDXw+t28S+INImhSC+07WGa4UptzMJSyPjsHU49MkVriadqsonrqhCldWOb8e+FtJ0v4ga3NbI0FpdSpLHI+VMTEcgn+HqTn2rz5UIQi2efVSU3JHkf7R3hA2+k22sW7L9v0iQL8p+/FwcqR95ST26ZweleDmmHU4KrB6o+dzqjJ041orWLv8jiPFs0Gr+E7HxPCGJLBZWB+6ePzrGVKWJoqoKq4YjDRqpDokCaS0+zMvlBkcD+Neen0pVaLVPQ7aSlOh7pAl0b3UBdsgAfa4ArzleD0PKk5KrY+kfhnBNa+ELW+tomLQ/OVU8lOOR7ivlOK60o0Fc+goTTpI6DR9OisdWn1XSo1Wy1TL3ECcCGcfxAdgw6j1r85zGnCVJVV1NKOFVOq5rZkt5uhcPjgV48bJIVaPK9C3bXAuIMZGAKo0oybViSAiM7SaymXNdSWB/m345zwazSuTTSZNcusoCYz6ZoUWtzR+4ri2gVDnA9xV30HG09US3jD7Mw77azteRc1yxOc0pR/apx/eruhpE82Dcqhuz5HK9+5rCpqd0k1AoEsre+eaqKaRzU7X1HkqF46e9RO5pJohnfdGwyOnFZWdyYu7KZcjB/KuhLQzqMrOy5OePWiSbFTV0QklT6GmlZFytYgkkUPnuetUtTkd72Rc8Oz+F7bXrW58Y6feXWmJKDdW9jKEkde4BOcVceVS1F7JdT2rwj8dv2aPCvim2n8NeEr/T9PEq5tGi8yZv8AtoOa6aFenTqJtDq06Lp2R+oX7FvxdufiL4bhurDT5LfTTEpsrJH3yMPV/T6V99hKjr0U0fG42tGMmj728Pfu/CNsXQKRCOi4wcVo42jqeW6jnV0GW88e0TSdQcjIrKLtudEk72Qy+mN3ahrsDaGyq56+laTfu6jp2pSstznNR+33mo7VsmlSM5k+X5R7e9cm8zoSjGK1OV8bNaXaym8k2Rt8hiTjcM9AKyqOLdgXkcR4x1DSbbWdO8LaREiQcMIpIxmQ1UZWmoouMZSvI4fVdMlm8X3ySq32t4iJMLhQoHA47Vry/vLMirZJGHotzo7aTd6fqczSi1dkMTKAynseaqMUFk4nmHivwXp2rarqV9phdZotplTGC6Hr7ZpOMVIxlFo+Y/2ovDunanq4SOZ5THGf3xyHT0zjrVXitgimj518c2dzpWnXFpcysxwMg45HrWUle4SPIriLZBcusYGFbAbqK46ySizegnzo4R3LgljySa+QqfxGz6mm7QRGchcUaWBayILmT5BmktDoiZF4d2c1TRxYlaNHS2shhC57DmrlC9S511vdm2XI0S6XHf1rRLlRUZKroOiH2RsOCMetYzknsZVI8jLFrPHK4IIz2NLm901o3bLkzlY+RXP10NG0mLaHIO3tWsmlEl6lmBsuST6VlFvmIi+VllTn7mOlW7WNFFz3Oz+EviD/AIRppYvDepaboWq3+6C88UandSMILQj54kiVTgsMgsPmOcAjmu/BY9YaLjFJN9fIc8LzK6Z9Z/s5ftn2Pg7UNB+FnwWOrzpbzbLnxbdaUCbZXIBSxsUIhgJI/wBZIxkb7zNX0WHzpV5QoxT5U97fkv6ueRisPThG9R2P1K+A97P4m0W01bUvEk16AoN3JcTnhyOQgH+tfnGQSBzjpXu8jdO8j42tXbumfT2gPB/YlstnbSQxCIbY5AQwHvmt6NlTPKmnz7jdXeJY8ZIJ7qOawxE1ax14W7Of1i31S8g+w2crwRuR5jtnc/0rhd5LQ9SnGlH3nuF/b/8ACO+FWtox5KCMs5b07k+5rZXpwscvNGpX5iv8IbWTRfh9JrO0yTX1xJPjGCcnCj8gK6KLjChcnGN1sQodjymWKbW/iYdd1CJZI9Lk853zwpGSR7npXFGF566rudjUlT5Yo8u/aXn1bxjp97ZRhUS8jErODyAXII/LFZ1oqZ0Yegk1c8svfCMvgLw1JZaJF5VpLZq06YwHyQeAK5uWMNIm9WMWeX+KPDY8LW8N1qsWbRryS2unA4QSAEP+BNTKFNLVhBrY53x74WtrfTTqcjrLFc6e0czRnpIo4YfzrGfKl7pfMj4/+N/g/XbfXrT4geGr82l3EyoLkEmOZOflk9PxrnnRc1zLobckm7ostaJ4z8NHU9RskjuUUrPCTgFh+hB7EVMZJxOtXhCxxrwpp0gkeSY25OAQMvCfT3Ws5XiQtWaOm6RcQzebBsKt8wlRfvD3AroopN3OqCs1c6bw6pE8Y27Srche9epDSJ2RZk3ckc9xexyzxKrZ4lclD9dvIrzsTKMZM74NWvc828aW9rHNJcNb2zD7p8u4Zj9cZ6V4OIcpNuwVYSlqzjLp1kfgZwPSuaEHe7FFqxBIoEZb8qJzdrES1Oe1e0a+1Ddj7rDrW9FtRsznqQ9od54QIhsljH9zoKyqxle5VGNtzqLEMyKW9K45LU9CMdDQRsR5IOfSspblN2IGVgST69atWSNFqtClNOyyHb696pMwafNqMuHOQzDtzSUkaTs4lYyJxz+FO+hCuKAQCuOo4qHK7HN2WhClsWlOfWrbaWhndI6DwP8AD/xj4+1tPDvgTwjqGtahIMpZaXYvcSkeu1ATj3rpwlDE4qfJRjdmc5yfQ/SP9gP9n/xN+zX8JrzUPHHg7U9G8SeJruOCOHW7JYJmTbu+VdxYKvJ5xnHSv6K4Ay2WAyeUqkbS3l+h7eWUabwntHq1q7a26H0BpGteFI9Ih1O01MX9qZ3WGZSCplAO5/fG0j8K+lqYlyrRTdnJ2X3N2+5M9Ne1mrxVj568X/FKPxj+07bfDy5SU6dFE08Vw7IqN82GYrnLN7ZwM/jX12FpyoYKU47pHRyypUuaT1Z5qDYeMP28Dr+rxyz6R4KjU6Tbrbl3nnlfYJiiZ2og6ueFDZJwM1xV66eIpRqac0fxHgqUquKsnryt6tLZX69ey3b0Wpn/ALVvjSz1vxVNqct5DJBbsGkiDD5CZFVVCn7zliORnA/Ovr8HG2F9n2OzEYiFCkk9Dzjx4ttca23xl1S1M8ulXwMcTjd5t27DLDONxCuq89zxXFUk5u9m+XyMqdC0U3szvv2ofEtx4v8AD8HhfQddNs620VzeQvGkcdnKiblIO75yi4Oe7NgDjnmhh/aturt6mFWusJC6j8zhvhB+0p8TP2iR4ktLTwjbx+G/DEFtpdx4xik8s6vOAGkjYyAAsAOOx2jkYr5/BY2FfOqtCUrRjsr/AH6ep5UKssdipN35I7b7+SIfiJ4StvB+t33hRdUNquraBcanLbJIdlhAoJhhJ6ec5PmMR13DngAfQYTE0niqkYKW3y0/D9fuR6kVKtRTilZdX1/4HQ8z1TU7jxB4jg8aiWSWXSrTTGjkgyuws7Bt3OScH8q7J06dd31urHE1KVrHB/EHQvizpP7O1/otnFp2maH8c/Hsl1LdCRhcXljp0oUjav8AAZGJJ9RX4ZnGBwvEfiHFpX+rR36Xk/8AJHyGZ4SviMwUY3s9z0n/AIJy32mfs/fHDRtOaAW1vJqZ07UDM+NqzwqynHYZDc/h1r9JwmHjRwsqUFtqe3h6Hs8NOEFsVf2tIW+BH7aGtaTpOnpe6ZrELyXUezBkiQZYgY6qnIrWrKL5aj3a/IbhWqU4zn1OT+Jup/C34reJLnTtHuo7Ca/0pI7y3vZgAMxq0dwjYG5CxZfVc/N3rxq9Xn5lcurSpyptJ7I+QfiBL420LWZPh/4hvXmSwkeKzMxyUGclcnqD6dK+frOqm4PY+TxPtlUdKWzM3wrGn/CMaj4eu0HkyjA5z5TdVP8AStcE3DDuDN6dF08ucGuozTtVS60CWydMmPAbH3tw4P1GKSftYO5WBrQdBxKnh1EZhbMQJI35JGQBXnSouLOWGGlKd33Pqn4VgJ4Ks2H30LAkD2H6Gvzjjqs06SR7lOj7Kmjok2LkxoFzyQBjNfmtWpKe7Gpu9itfxiRCcdO9RF2NJx5omdp9/Jb3Plds81pzdGcdNuEzZXDrvQ9RUT1O63Mh0J+bHasb2MovlYsvmgDjgUcybLklIsWKZALdKlybZVOSTsSX/FuwP93tWsUVWfuHP6Sd2rkA/wAVdUfhPOpfxDcnbAx7VjM9CfwGc0h83nrn1pxOOKdxzthB2rOTNJ6IhdhtPH51C3JgUpSQvBroTRFZa3K+ctx+VUKk+g1lJGAOaynI1exTnQpLu7VUW5aHK7qVxwlxwDxVciW7BNyZr+DLPVLnX7WLSbA3UxmXbH5W7PNXS0qR5dTHENcjP2c/4Jv+CPFNj8MbS6vdAbSpbjYJHlB8x19Pm6V9/gq83RVlY+IxVGUqrZ+gWnMIvDsMS5+WIAhh149a6nKUo6nKqXLXM22uEgR/PBPz8A5rOGj1Ozlu9CPU7meRQkZwDwNxxirqXauVTilJ3Ma4UmCS20S+kyAWuJ2bjHcVzJq+jHJu95I4bxcLq8162tdKiSe6ZcncuBGPX3qJQblZBG7jZ7HI+MoJ9I16ze3iW41IsBI7kYT6VfMoTSS1OmlC1J32OL8Za/qVje6hqVjOJLxWRZncDYy5wVHv1qKlWak0jKcJNIwrbR4zFqEmtxpMtzcAO0I+5wCCf6VpTm1oypJQV0cv440QaRdi5F3OIVgGTDjdKvvjk/zFbSsjmnJWPl/492AufE8+pRahLHbiPaGdCEGf6VDklsZqpOWlj5r+M1mlnamKWNi4Q4kHKsvqD/SpcopGjaSPFL1R/Z92/J/dt83euWouZM6MPrUSPOEORj3618hVSU2fSbWEZsZx61LtyhfUr3LAj5elSjoi9TKvE/vHnNWjgxTbudMihowcduK0bXMelWjzNjrWdoJckjk1V04nHFunM0mjW8g+Xriudtpux3XjUQlhEITtb8yKlxbMtYTLsx3KMHmiMUmJN82o+0+XovXrTnFM1abV0WY1w/y8D1qNETF66lmEqoGeeM/SsW22auVibT9H1LxPq1v4e0aBZbq7kEcKyTrGoJ7s7EKoHUkkACtqFKVWXKkZ1KsuXRH03+zFqf7Pvwc8RaX4ZsviU3izxDa3Xm6p9ihdvD+nSkY+eX/l4ZTgEqACRgFh1+nwVbDYJqkrtvp0ufO4pYzFaT+Fa2P2Z/YzgvPG/gWz8VBpYhMcw3NxbiNivTMSZ+QHnaAMAcnJr6q8alG6bUr7W0+8+blHku+h9LafdJcWarCjgRnyyz9Wx3qqT0aZwVoqLv3H3jlcEQbzng+lTVUX0uKkn3sUtUv1jXyrUp55H3z/AA/SuZtLbc7aUG/j2OW8cWQFgF1OeSQuM+SG+aU9hjsKyqRXVnVQtL4VZB4dvNUtPBU+is6x3iRM+ztbofur7ECqjNRo8oYilGVdSR53Np2naLZ3OloknmamzJb75PmkUnJb6/LUc0Y6LqVTqNzt2OU8QeDtMfT55tRYtEZvKBJ6Iq5yfxHXpUThpub+1adonllxpE3xDsJrKGBkdCbayMeeQOc9uMA81hBQe5Tk4u7OI8feEbLV9J1LQ7aMysrpDIMfK79M/l3rKolU2NITvqkec6NpMUfhC/8ACOq27y/2bKCJnHzKR1B9RjIopUVGLuatRck0fOvxO/se2/tXw7JaJd6ereTcLEmXjDA7JB7gnBFclZ3vGLOuM0uh5/pPw21XQfDbW0cMxV7Usqht22QHh1B6Bh1HrmppUZRTuU5pnFadaLqEc1okUuYZCJV3ZZG78HtUcuti1JSWhc03S57XeI5jtUgqynH5g/dNb0YWeh0013Oi8Oxs8ocZIDdxzXpJNQOuKVzldd021eac3FntZtwDmfYT7g4NeRi5xUnc9KnFRPLfEVjcWN1JLIuEY8YlDfyrwa0ua9hSpycr9DBaMu53KRz+dZKokiJWTI7tcJgHtUKSkzOabRmwwAXHmkDk966E+xhTdpanU+GjmMAHnHBqKsrI6YrU66zXYucc4FcUnc617sS1kn5e3vWO5DdxHIA6U9S4Np2KN5HyZAvHtWsVoFRXVyvIxlTaRgds1ErJkQkVhEFfDevFXbmiU9GTqo2gheOxrJqzF01AIpO0Nz7Vt0Iik2amg614g8M6hHrPhvXr7TLuI/Jd6dcvFIB6blIp0MXicHV56EnF+R1Jxhqj7/8A+CdkPjL4ofDPx14z1XWtQ1X7DpSvoaarr8d5di8jUhvk4eIFWO3KjIJwWwTX7fwXxBjnkGJcp3bv112v/lrazfoR9dxOGwsYTkn7RtNxVla+ml3r67721Q//AIJ5eOb/AFD4Y+OvCnivxNFeT+FvF18LdPKcGCO7AmiiO8Ah08xk4yPc9a+i4DzBZ1go1K69+nJrWzd9VfyutO9n8jvyPEVq/PTqRas2vVLZ6PZ7mDqHhrW7v47XvxIKn7BpGlbIlMZCyOxztJx14/DNfrXMlCyeh72Lw79mpJ7kv7Knxh8O638evi98a9S8JPpHh/wD4Nmsdb1W4didRvLtl8qD52CbIghYBQGJk5J4r8o4pxVetxJg8JTb0d7el/n1/D1Pk8RVr18xp0IqzT31u/6/XU8B8SXj/F3xJpvxEFvJCJFW40ywmwG8sllSWReAXcsSo7DHYV+y4GXOo1G7WSaPr6MXOcfa+nkdD+1VJp3h7wvNo9nGmnw2tgH0+WNgz3FyApaVfQl+A3YfhVzxPtaclJ6v+kaVKs6VK6Tev4HC/Ef4qSfE74H6vLoGnyWkvhXQ2ivpJFAkup8/vCxxyeAcdhiuZ0JVYzlzP/hjyqjUoOcr+h3t9470/wCMf7P1r8BPD3g/QH+JOjaOl94U0SJXsrXxbZuoYzbotqi8g5YqTiRRwM1+QZxSxXBvECzOcnVw1bRt/Yfd22sclBYyjiEk3yy1Wv4HkWt+PdX8U/De8WTWzqOs+GbN9N1nVpbWaEz3DgeYNsypIAj4RdyAYBA7Gv2jCVsLUwcpYepGcXtON7PRd7P8D0qMq2KpXcbW3RQ8H6JZ6xoXi3U/tyLZWU0BMglIEkcScH3ySCfbIqacksJUqPt08kehSjCOFUktUupzHwc07WPi2LHxH448yQ+F/Bsn/CLWMc5eC2gSfzS4yCNzsWJx149K+O4ewOFiq2KkveqPdM+SwtOpOrKvNbnYfA+O71TxLq2v67MHuP8AiWpa2oHKyq29jk9wpYk9sj3r1r8s5WTsz0ML7t4ln/goxqqax44tfiRZxtbyf2W1wsvOdkZxu9cMox759jXn4m/1e7duU5MyrKhRt0Wp8ueObnw34x+H1v8AEXwhrQkewmRLq2LbZYoJVO+HjG5AylxnpuI4AArxcTOnUp80Hc8SWJji6anSW255d461G8nvPLvNT+2SJL5YuCcttwCjEjjocV5qblLlOTExmrO9yhaahYyWl0lyPKuAu24jHTrww9u9dM5RpwuU68VQcWZHhK6lmupBBIXeRztQk8ken1FeTTraNnn5TCV5Tlsbmk29uutr5IIBk6HuPQ0pOU02j1Z1VGp7p9S+AkMPgyxYKAhB24+nSvyjjdtYuEX0R6CnzRTNgPnBGeOua/Pp7kJXYrIrRk+3NZ8zN4voYuoWxhm81FGFNbRd0c9eFndGlpt4JYguB0qkh0al1YmjLLKSD161jUSSKkveLT4aMH8qxW5aaJLIEcEZ9eK2shwjqLqLf6OwJ/hq1oXUV4nOaO2dZYf7VdS+A86l/FN+U8Enp3rlm9Tvk9ChtzNn34pK7Rg1ZXHzDjao6dqlprUhtyK0xwuCT7Gqii6asVLxcIT29KE3czra7FSESOQM/pWzehEXYcfl4/PPaspG71RUustJtA+hq6W5zS3EiUZ+Y9a1krmTm+h6Z+zh4O+PXi3xpaW/wh0ebDTqr3ws96xnPqa9DL8FOpNNbGU+VpuXQ/af9lr4MeMPBHhGy1D4s/EW7vL0bCqSTqgLYHAReB6etfa0qMKCtzXPk8TiFKUrK2p9eWMm3w9Ai8fuxtz6Y/WtZSvC5xtr2tzLguN8zuwLBD0rmg/eudNkloQ6rIl0nn3EjKirgIDjdTqy5t9i43iuVGNczokot7DTpHRQMxA5X6k1kleRDi1q2YUPmR6nqep3Vkn2uZQkaKuQiA859OKu1ro1n7sUkjhtUvYtZ+JU89hYuXtYwuWUMoGOo7ZrKMf3zY7yVI5XxnBZal4lm0Bhiz2Ft5ULtkHI/HIqeXmqWE3OnG7MXULmysYb2aGzk8zhZGLfhke9dMIXbMpXmkcR8S9E8WeINK8+1v2msZI1DhV2OvP8LdQfatpwvHVmbhb4jxf4m+CkttE1HS7xpZQkOQtywDgkdR2YGs1ZINIO58UfF2w17R7yezv12QNzDFjjHtnpXNUk3KxnKSnueQa1u+wXYQbR5ZxXNNtRfodeHSVRanmWW5r5Oo7zdz6NO6GsxzzUPYpbleVs5zxzTibwM68wQTnimzkxKVmdRCu6EL7UTdqh6E5WqsbLAT0/HmqTTM6kFNXRNpl60L+S5GP51ryx5djnp1HTnZmqqRyATKOorN3SPQtGaugmYrkJ/KsFJt3ZjKPUmtshee9TKbexakuWxLCziQ5/lUsz1uTmRl+UGqhBNal6S2I7yzS9t/IljDBvlORxRKTi/ddi7RS1PtH/AIJ+/BTwQ3ibSNF8KjxBrtnFdR3N3f8AiKH7Ho1tckgOLW0U5uZh0EjYPByvr9Bl2H9rXi4Kz0u+9jxcfi60abg2+XpbuftX4X13wh8NPBtppBmntL+WPaqSKHkCHpgcgE8YH0zzxX29RRprc+StVrq9rev/AAO+/wDkeo+DWuJ/DcN3PayQ+b86JL97B6ZrKhdtnDiowjPzNG6mjS3PmsQOmR1rao4xptszpXclYx7u/tbCFpLOH95jC8ZJY9h715LmorTc9SNOU2ufYzhot1ZRNrutXCG9k/1Zk6QD14zzTVNqPNPc19rGXuU17q/ExpfD1zdaZPPDNI6S7tzsu0SHnLH0FZVLON0aufvpdTy+e31e5+I2naz5X2gaZZyiFJHwjAbckDuRk8+9cyc/bJotQgqTv1ZzvxR8KG/8RagINcnaBYwr20bkrhumR7dCa1q80noyouKgmkeUaj4L8d+Erye+stVkhOmr+6towfLnj59Oc1yqnUve5TcakdTjtPj8V6r4iutMu2WKBEa5shFkByBkqR1OPSrTqwm7lKKklY8+1S2+LfiCPVNY8KXa2qtYtKmntahopxnDMjDkjI/nxVxdSrB8r1NpxjBKLPNfCHwziuYtY1rxGWs7uYh5nVV6j+Eo2Mj3rCFBpvmNZLkicZ+0NbeDLK5i/sLx7c2xktV863aFovLYdGIwTtz/ABLVzlCPUzg5SlqjxF/B92upPrS6is0x4mkjcBvYn1BHeuSTjJ6HdBK2xp/2U67p54tswOPmH3uPUcGuyjZanVSk9jU8LcyjcMYzwK7ZWcTpi7HKat/ZWp3M+marp7XCBmKlZCrL75AIxXg4mMZTfNsdlNzqaHLX/wAC5dU1jyvD/jzw1aW0i7lk1rxPbwBfrk5/rXnSoKbtTdvUVT2lON2zC+IXwkk+HNrFdzfFLwXrbSMQbfw34hW8kj/3gqgAfjXm1qNSk9Wn6MiFVTlZnG3kqmMKDxjrShGT1NG+ZWRmzysr4TIORz610XcWYOFnqdF4TLtGCx4rGpLmsjohNW0O4tVbyg2ecVjJI6I+9EtpD8ocjisuU0UURXEbAcD6GhWuKyUiC5jwmD6cU3K2w5u6M4ZDEY/MUmZqKirkMp+fnr0+taKVkJNyY/JC7B6VWktSmtLD7cbmG8fjionKyshpcup0XgbwX4q+Ini3TvAvgfSHvtW1S5WCytUIG5j3JPCgDJJPAAJPSrwmEr47ERo0VeTM6k+WNz9If2Sf2XfG/wAAr200nwx4i8OfY7eWK71/W7/VcG+lZSsgjjC7vIiBZF/vklvQV+x5bw/mWTYGOHwicpTd5NrT09Ed8KeH/spwnGcqkr6KOi7anaa34a+G/hTxz4m1nwTp5gsNb1NL3V0ICteTrGI1KDsmFXHc1+lcIcMvJcPJ2fNK7+897KMJVo4aKq/G1/XzOA8d2V/Y+GJ1iaezgvpMy27OGK9W5xwTzz2FfeYefNBaON0rp7+jtpdeV/U9LEOMpWWtjzj4Ox6H4++GfjfwrbWtqNCk8SI+pzxKqrfSop/dFsDKgcEjnAIyBkV41XLKFTOljpaySsjyaUKEsUqzWqZ5V8VItNtzqd74KsWOt6jamPTbWRflto0G1rqXA+XPIRQPlGBzzn6eXPVlanpoexCdRNRltqeZ/F/VfiV468I6ba6+8Q1G20aKzkvQmQZXyWwCMfKgUk9z9KzqUf3ai9Gcbk0lCD66k3jDw5rGj/sQ6v4rhsQBfXd3F4h1xmzLNKzArCMDBZsuzHjGFGDk4Uaro0Jwptp228tmZ432bhUcpNzlrr17u5137K/7O2p/E/xT8KNR8YXkgbTo5bosL97d4rWOEvuLqQYwzDHPGK8DiitRp8I1o1qfMnG1mr7/AKnjYvEVIZcpPS2zPQfDX7N4/ac0Lw78fdE/aJ+Dl83iTTry28U6VLqI0nVriJZjGoukllYSvD5eRcAh3AUEEHdX4Hwl4k4fgTFPK5YKo8LC/NO7lu73t2V909NrHdlma4SEI069Kop2T5ormi7ry/U848M/sj+P9E8B+ONM8V6RPp3hvTJylz4lvbiKCzu7Uq2WgcsRLlfulSdxr9d/4ilwZicudHB1ZS9qn7tndX7prT5nbi62DpWowm7y79jkv2bNY0Cb4WSWeiRRlb7wtfQ2106gLBHDLHGpznriTgHruzzg19BlU6ayiCg9E/zufPYatH2Uacry+JrR20a67LfRbvW2zNDS4fD+k+OL6zhvvK+x2dxcCYjm5Kx+Q0gH90SMfruFeisRFX3Wh6MZUY2drO1zx/8A4KG/GLTJNMsPBtlqSXV7d6bZWs8jR/NEvkKzsMdM78/jXzebYi0PZpt3Pns6r86cbaSPj3RbrVPD9ldW1pfY8lDBcQIcCeJjkfXjP6V4MaUqNHTdHztDmw0LQ6FeKRtVluJrfdgkZhkzllAPt1HA/Koouo5czPQoPnTl3K+t3tnO/wBilljeeNPKWTcR5q+9LF10nys8vGVIe09m3qTeFLRFugs0Pl+UeShwFxkgn1rmUbs76C5KWht6BbJe68kLkKPO3Blz83NauUacblUoJzuz6T8Ha3Dpwj8MG6ZQIFZLa4iChzjlom/iPqOtfinGEa+IzSVRfCj0ZV4KSizdWZGbKPnnivjHa5abTJ42BTb6+1Q0dCtoyrfRCRTkd6pS5SasXOOjM6xungudqZHPetFKyOSnHknqbMbEuGHpnIrKep1vVF2NCUGBjjk1mSkSxqYxn86Z0R0RHfrugb6U+Zslyu7HO6KFXVm3H+KuyCbgea7xq6G9PIMkfrWE0zsb90pucPyOp64qorQm9xGYtk57cUpbAokEzZPseopWsiU0mVLx8Lhj25NQtWTNOTK0TMT/ACrZfCZySiwyWYnH1qZmkZXRVuDsbPqOKIbmE1d3EQFznbkntWzny6BCKserfs1eJv2kNU8X2Hw8+C/ie405JrkGWZWCxxLnlia9LLqmKqSUYOyPPzCrFRatqfsD+zZ8FoNLXSLrxz8W9Q8Sa3EyPLGLwtCj4HZflr7alQppXbuz4utVqVN0faiN5WjRx5PyoB+lOo/dsiYJqdjLsJEJmmaPcwPB9a5oas9CUW4qxFdahJP8iWbHHViOM1Uk5dAhDl6lC+WRbaW1F0kW9cssPJY+lS70yuSnF8yW5xV5p6NdFbae8iIB81m/5bE9selZ/FJO5tJ3hexw13ceIpdeu7HSTFbyxQj7SqLyE9SfXFTGM5VGOmoqPM9Tl9dHiOXUJZCI5IPLZbclOVcA4Y+gFNtxmTVSk7FTQra6awmad3luFP8ApMsvKufQYrrpSbRFSUIqyOPez1C5/tK/029uGEUnFvIPlz3B9RVyu27mNRTa1PE/ipBceJ7u+04XsrTlMhPMx5fspPX6Vm+SO5zprqfH/wAZtF8Qae11p+tt5yR/dcj54/qPSudckmTfmex4RrKMthdqV+7GwDAVlWhFRZ30IpTVzzLaxX5hzmvj6ivJ2PorWImII5x7YotoDK02OuPoaR00loZl65AK9/Sm1c48VLRo6i3lGwY7CrnBN3O3EL3myRJMyEMevQUKFlcVKV1YiuUKNvTtyKamloYYinyu6NDR9TMh2Nge1VNJq6NMLV+yzSlZTyq9u1cbi0zraQsRKnJo5EznvaZPF/fI601BRRtON43RIhBbJ/WqREGkvMnRZpSkNpAZJJHCpGoyWJOABSUOaVhqMm7s+5/+CafgSH4Q+NLb4kfG7wNrd9r1gjSeH7HUb3aYTziMQb8JECNxlcqM4Cq2SR9dlqWG5VOWqPGxkHKUkm1G21tPvP0w/Zr+LOp/FA6b8Sdf0Frm9vrlzbQZ3o5DEDZ0+RRjL8jjjrXtRnUxUG4q7PAxNSlShyLorfM+wzK72Uc8qhTsBYDp0rqg1Shdnzc7zdkVdTv4reAIYt7SttRV6msMTWiqaXcdGMnP0KuoRvBHHDawqhxl5Mcp9PeuWUGkkkepSlGd3JmRqttdanMhliZbaPsRy/1705RcrX2OujKFBNJ3bKPjHWIbfSjaySFVSM7IEPLn8qxxEm1YKVPlfM1v1POPB954euvifY6dH5s9ytnN5wlGEQNjPHTsBU4dU51Ei6tKbouXS5x/jjU/EPhf4maZZ6RozznUZJY9RQDiNOofnrWc3OOISiiopOEl0Wxj+O9esr6PUI7oulza3CPbxZ+UIM7gTjJFaN+8xqD5bv8Ar+tDwP4h/FDUtX8R6jpHg/RblvItxPbahEojNnOVwFBON44zj3rlniFKryxO2jShGIeB/BGjab4Uvr7VvEX2TUbmEGWW2nHmB2+8SnbJ9K64qFON3owqO09jzDxR+zvrMet3Hi7StdOsLNEZGsrm9IJXuecbT7c1x1VJu8XdBKrFrlaPHviFomk6lq39k6naalayJgCPUEMixjHVJBghe3WuRtSbTNILS6PM/E3w1g8L3xTTbVgr8qLvLLg9kcHp7U6dKN3qdkWlHUpzWQhtCv2RoWyN8QkyB7r3rsp2NqaaLHheN2Zjg4G7BxjtXTKSUdDshC+55xruuxl77RtcXyVG5ra8VuQe2cdR9K+dxVS02d/tY0lZHl3iC2nimIm1K1vU/hkh6/jxmvJk5Td7mMmpS1MpUWLJUAE+lYztJjjBSegPl1+Y8GtIPQtxUUV51CgSAfTNXFKT1MZy5om34Rm6KfUUpwsiKaakd9aHMYOe3euSWjPUgrRLyNtQcfhWDYmxkilwaRLIJV+XbjJoKSbKU1vtXdj6EVSTFPTQotF++I9+5rW2lgSJBHu6dqS0QPTclVRGeOT6Y61Di5bEc05bH17+x78DfHnwb0e3+KXi+3htNT8VoIPDnh2IJJqF9bEZYyKTmCA8MxGHZVx0Jz+n8K0YcP4OWLqte0qWSXVK61PSyukqcpzrWWlle+h9YWNva/DLwfY2moQR3ereJtUhiE0nyl8vwFU/dVT0UdMZr9/yynChlyk3dWvf1ProSpwvKLaSW3qP+Mvh7UE1Cz0eyZLRpbo/vS5y54wq4GSTjHtzXt5dVUqTm3qZ4STdN1Gmzzr4xeGtQ1nS5tAtNRNqJ4hHcT2pJwSfmAP97/PFbRc5rQ65ypyp6nDeMfEHhX4BfBO08NeEtCma3ScLZaZA+5765kbaGbA5LOxyx9aI4V8spwV+XVveybS+WrSv3aPJk44X3b6N6epz1t4UHw304z+P4YL7XdUT7TrrvyAx5hs48g4UE4I64z6k11xnywTiz1IwfsLJs8b+MGt31jf6jqOovJPZaPaPLDaQ8RLdPkbhgEM2cDdzjGOgq5OfI2mcelNNxRz9to/xSg/ZKuvhTFLdzWlwIpL6RsyKLicSYJ6847+g9qI04Socq+No5HCdem5T37/kb/7NHxF8Qaj4X1LQfFfiN7bxBo+nvpVzKgUeZEVJLcdjlgfrXlY/D/2tk9TAVVd2Zw1Ye2oPCcuyPkTTvB02i/E7UpvF1rba54g1PxNLp+g6Bb6cWa+mM+IYQgYAoS4yuOfpX5rwvj8uyzKsRUxsopUk1JySdlHWzve589lmM+pYadTFO9nyxjqm/wDgdyL9qTwJ8Wfhx8aJ/wBnL4pPNZeJLCeOTV9Gsr1hbWIaESCFIl+RVUOBtXptrv4e4k4a4+yuGMyzDw/eTl7ygoOyumlFWVm9dF00OTGVvrNWnTi25T1+8n+Fa+PtM8N/2PpfiSSCz1BbuzltoiyLAzxhuMdFYoMEdC1e/hcuq0sPy8zWr7/me3luHxqpcsLaEfg34tfHbxn4r1ldW1GKK4RWYJCmWEIKmRRjkqTGMj1qKMq1XEShUkcuEoYuWJmsQ+uhw/x71zUrv4h32keLLySW9t7kTW0rHdlCgAQgdAAMfSuLHTiqzp72OPGzjPEuh22OJ1W+0jTZ3iurcwwTW/lt8uSDwflP17+hrnhOKXvbMyrqjhVeXUPDVtcalJJE1ssXmW5DvGMEYXIbn1pxpKKbRVGo5rY534gWp07XVslhWRSNs5YdGzgkGvn8dUcaqR4eYRjHFRbW5seEdPlLmKGRvKYclhzwP8/WqoqSOqjVm48tjf8AD0DWmuQqUAxN82Dg49a1nBTVjsox98+lI/D2m614etrS+h5EKtFLFw0Z7Mp7Gvx/iurPD5o7arsepWowqwUWMtWv9On+wahJ5zKMx3Cj/WqOpI7MO/r1r5KrRVdOpSXqjBQnSXLL7zSguBKgdG5x61yKxrCavZjndWXg9etZzTRs3czbpVSfzQuMmrh5nNWaTNLT7gyRgenernHQqk7xNW3PyY7Cua1mdNNEjSqq57jtmmo3Lk+XQpXd0pgdd3JHrWsY2Oebd2YGlBzqjHH8VdcVaJwwd61jcnfDn9a55nfJWgV2IbGDj3NSpK5hHcAQPw70nJHRayKsrEtgHHvmk3c5X8RXuznBqYldSqX42gcd810WM6m46Js8npisqgU2VrpPmCgZ5ogxyQseFGT+taSjfYwu9kej/BvQtDk8VaYo+IV7bzTzr5kGnuUAGejMDXdh6cYTT5rHHWw9Spd2P2e/Yg0H+wdI0218LaTLLBJAv2nUL3kufYnrX2uFpNQVtT5zFU4RmfXV9J/oCxyAnC9TxzXTJNROZr3m0Z2iTIq3G1AWB4IHB9KwpqzudOjSFvLm68lBNhM52gHr79elOc2h2hfQyb2yOozfZdMvApZc3FxjDAc8CsHK7F71rs43X7i2srsX9pbTTMuY1aZ9wCjq1UpRSujVU3JWPM7RPHOq+I5F8LzRQaffMz3c8vMzxggYHoOtc3tKnO+TZnVajThrujG8bWV/FfHSYdXeB2B+WNQC8Y5bPuamcKjnqznU1J3KWy8m0lo9Hu7qyjlIVoZT8zHuRXdQajDQ5525znI/CWtGwuD4c8SXcsSZLqWXer+pBz+XFOcZyegTmno0ePfF3S9cspLiwntEn8+ElpCgR93qMHrWbUmrHI4xTufI/wAXCLjTbyHVTNHfwZVfNGN6896UISkyk47o+ddXnZ7G7D4BCMMCssQ4wjJHbh176seXyq3IJ718Y5XbPeSdiGQkLkDpSAgcB1JB70XszWk9DM1AESZ/OtIvQ48Rd3OhgkCx8ntWk5WkeniE3exDFqI8/Z6nqKevKctKdplwl5gMqRn1rK1zqfLNFcu1lOJAe9bwXNoefUTpT0N/Sbxb2DIxwOlZ1IKJ6NKftIll5ArhP1rlbdzOdrlmEq0fXtxUts2i7xsNjc7uenat9oiirPUuKuU3EgjuMVzylJvQt1EkfRf7EXwu+OniOa+s/BWheM1trpCbm30u1U21+vG2GeQssiRkZY5ZgwXAUnp9Hl2CrVaN6l0eDisWnKVn0P19/Yb0PVNH8MaLpPiKVoXjhjivSq/LCRjFtGSowm7jaAScckdK+kwyVJ6XR8/WhUrxvLqfaWoXSWmnNN5DP8vyoozn2rqxE+Wjfc8WjS5qvLexVikBtlvLm2KNjIUjJWnRjempzRFaChJqLKd1fmfgKVB7lefwrOT9o9Drw9K27K0lxJJdhY7ZmSMfffnb/wDXrH3uZnZyQUNdzk/Fdze6vqT2+kl3fbueY7Rs/PtXLKVTnvHodEYrks18jjIbaxuvH+n6N4VdlSI+brF4V5bH8I+p44q6Eb1El8zepeNB3Wr6Fb4ieIbCPxq7JIrScv8AaJUIWJRxtJ9/61VWpCNT3TKjh6ipuUjhvjHY6Rd6ddHRbRGAtj54RhkhiASD2xXLVqOV2mOHNoeZ6N4Bl0FVtYljuhd2weKCXpIVPQt2P1pUlyyOtT5lqaXjf4Y6LPZ3N7NocduLlI1kuIZ8SRN05PVSP1ror041IkObUjxTxLrnj7wftsI7qK7k02dlS7jRXZ4y3BZWByccHFcCU4mijTk7nkHxP8Y6hD4sW+8UWdkbeVT5MunxZGT2eNgMKe/oafKoy940hfaKPL/HPhe6n2+IPDuojBciXTp3G36hc/rWvLFRujqgrKzOR1WBRbbQzg7vlRsNg+gPpVRWp0xfvaD/AAqGBkBAHyNlcd8VpKzR2wvzI8q8RXT35u7AeWziQ+X9ohzg5PG4cV4eJgnJtnY6Maj8zzXURf2Vy9te26xsOqKBivInTcG2Yzpypu0igy72yBx9axlFPY6KfK1oNuVZV46etNNRCXvaEDlfLw2CKqMrM5KsXFmj4TmUybQM/NxV1Je4aYfc9Gsc/Z1yf4BzXBLVnpRasXVPy8VnJWZnJWYqkEZA/CpHG1yGbIIyKuPKaaIqXDELxx65qnJLYzqWtcoN87jHX2oUlbUiFyXYQvHFF1ctxuSQKRIGBwQeD6UnJrUqLjF3R6L8B4vjP43+Mmh+HPhHrGrN4n1K4+yWU2n3bJOqOpWT5yw2r5e4Mcgbc54rbC4SrmOMhTtzO60euzTX3bmdfE+zpucmfoYPDn/Cxv2g4dSmndvDvw2ixFdTuVhursJhpBjIKp8xJ9cV/TnFPFGG4eyOmqitHRN28uh9TPEToYOnzXTkkdLqOv8Agjx5rVr478HeJbHVtCh07ZoOpWErTRsOUll6Z3ggryM5Nezw7mmBlw7CvRk/YqO7bbsu97tu27d2engq8quEUor3pPVf1oeY/FbUtN8KTPDLMVmdCLSHd8yIfvMfRiO56V9vhbyhfZF1a+iaPO/hlY6F448TT/G7WEsp9O8HYtdDsZLnEK3BGGlIAOSi52j19OtYxpR9qqcHaLXT8FY47fXK7ld/8E5jVdd1H4k+LvttleRRWqXJkWaZM8Kf3ki9OduQpz1NenOlal7j1R3KSpxSOF+Lx0mz1fUbaxtwun3sLRWkdyolKRZ6sAvEjEjB69CMVi1Jwip76X7GVVQcLtPv/Wxl/D290m+1nxZs86aDTNIthqCKjCJZxkoD/tYx7813YXDx9s6jfl+BlGDqPaxx3hCbUPC3xj03xZPd+XZajp/k3NrcwZEgk+XJGOuDnnHAqKtOEuepdKy21vLVaKy+etlZd7J8+IgqFeNRK729D279h/4ffs+6t/wUQsNc8S6Vrx8TeBtBuvFV14uutTtBpOl2VsIlaUQNCCbhyzDe7FUDgj5lFfyH4+4TiDL6LhRqQp4bFyjTUIp+0cpN3d27Wt5ep8rm1P2eMlWUE+eLjZpuzemmvz+R8r+NfEnhj9sn9uj4j/tKxLdQ6drerXV/pN1qscaTtFGoSMMI12gME6gdGHPev2Hwk4Vp8OcLUMO1rT1vaz1PWy/LMN7KNRqzirJ6/h19DF+A+iR+I/DGs2OiajdwSvquz7UkYlChdzFihB2/KCMjgj0r9Ow0XUpS5l1ZvSdqLpxb3vdeXTVPfb8tTjNBu9A0n47WlsmsRodp+3TRHKAMzMoJ6E9CV759q+dnGNLFu71PDq1b412vbQ4T9p7R9dvvinH44msYLiK4tIjqUNg26OORkVm2MOqhia8XF06tbERqJdNTz8whVjjY1acXKK3MvW/Dmkan4ZS1u5bSQzsjRX3RgCTgMM/wng/XPTiu2VCHs7bnTi1GtRUeXcXwVpkltqTxy2aGSOLPlsRteRc8Z7Ajp9a5p03LyMKFPktzaI4a50S6+JXjy+0eNoopXucWkdxOsQzknZliBk4wBnk4FfJ4qpQp1ajqvSJ5FaVPFYipCenLsW9B0zUdJv77Sdf0650/UtOmVLiwuUKSRlcAgg1rhcTSxVO9N3M8NWpVrqOjRv6UyT+IYVtYjH++DASLxz6Z61rVkoxZ61BWkuY+nbSIWWn2cTPndbKQV6Hivxbi67zK/kevUlFyRFdxM7LLDIySI2UdTgqfXNfJ0qtXD1OaDszCooVI8sjKa9msboiQAbjkgDAP+FJp1JcyOLWE7dC9DdrNHuRs/wBKnR7nYpRtuVr+QbCSMEU/hMqq5loWdAuklwo9amU+boTh076m9G2xApHasbNs77pIr3t1sjbnqKpOzsyJy7GK+oySOy4P1rRnLZylch0R3fUyxGPmq+dRiY07Rq6m7cjBYAj8qwcm9zrnJNFU/Kc9j2qlG6M1ZajkbcuccGpkrFc1yrcL8+D+FKKciOXqVbljg89a2jFIxk2pFfODkiqexo0pIRGZW6/XNZyTZhflYyT94QScH3pxiU5tLQQsoOK2tZGafK9Tpvg/p8+q/EXSNEtPPDXeoRp/ozsGJJ6cEVvhYTqV4xic+Lk3h5La/Z2P3f8A2WdGvfDej6NY+INRkEqQKkFsTgjAHavvcP8AuoJSPkJKcqjbPpPU5fL05ULsEIycDJp1ZvlLcdblDQJ1CTIq84yXbPFZ05aFtO6GyML4mGBhKoUhnlJAWpklN6GrXL0szF1TS5rLzb6KWW6mdCIkhPyj3pezjEL3snocrNpWqW8j32pQGyU2bBQG35z1JrJaPU3m7Q93U5CO+gj1m8afMUVpaLDHJFIAS3Xn0pLk579jKVOXIr9WcVrHi3TLrW7vV5bWKO+sdsUcbMBwep56nvWSqJybaInCUHyoyY/FnhrxGbrTD4kadoyAZUmULFJ1wD6V2UZRbdmKpSkrN9Tl/FXj/QvC93O1t4kgN0luQ9nC5Ys2PvZXrVSlZkzpNI+c9W1jxf8AETxNLqSai8caHYsIbD5z97D1MWk9DGdpaWPFf2ipr+Fp7TVwizhTtn8sDd7HPQ10QlfoCioanzTq1s90JYFVQ7Arg8ZNebi4OzN6LcqqSOF1/wAFanoFkdQ1G7slXft8oXamT/vnOa+SdCo27H0Mn7BpSOfnKFMqegqY031LfvLRFZGyCaU42kXCNkZ96yuxNGqRz146M3FEawjd16VXvOWp3Sk3NkNpa7ZSxHGa31cdTkqQtK5Ze7AJjDAAdMUKC2LhVjERs3UeQozUczhKxVWn7SOg7RryWzudhbHrVtpoyw9T2b5WdESsyeah571yNNM63FSdyzbv8gGfpxiocUJ3ixVKgkn+daLYrmcizZXlzY3cNzAyBo5ldDIu5cgg8juKI1IwqKXYfsYzXK+p9L/BT9pXWU+IGp/Ebx54jv8AXNZW/wArFp/iQ6TpNvYRMojZ4ogGnkZvuxryAOh5r6HDZknzJdfM8vEYSjhLRWq2vv8A18z9Cv2Ef2pvir8Yvi3ZaVBYpY2fngxJdnMyx4PK26D9ypH8chGfxr0MPOriPhex4mNrQpJJRP06S58qzVpPmwvJ9eK9tSUaabPl7SnUdiGO/iuIWnRTgE9RVRxEZU+aw6lGUHZmZJqBkl8yG3I+o9654z55XsdNOk4xs2Vb9Jb/AOWa7WJV+/EmTx3zjilOV3vY7aceVaK5gazY3d/DNa6VYG3jZcNJGuWk+p7CuKq3zXivmdkXGik27sy/h9oV3H4vutPsxCTbWoLyBOInbOMnHLYzWmGhKTfKzLF1qcKak+pxPxeOnN4ij8IRwqxuLtftN0snzOM5K4H06VzVqfv8qLpVpOHMcN8c7ay8MWV9dQ5ZTCDHFG2A2NuNx+tc9e1PQ0oRnU3OUn0zxJ4iuLfxCbe6szpkal4RyEY8bWA/hOevrWtFTm1Jl8vs24sxfjH4lVNA1DVdB1ZEvRCokspCcOwH3XPb2NdFW+5EYJP3j5v0SPx78TUmn1qzsrKWeQiSx0+QlgO7L0IJ9q5KUpzWqsdfs7anK/Fv4W3Gn820mohbcbHGoEjIPBBY4OOeKmrBp+RsqsIaI8ym0vyJ5tHvNWDS+Xut4pXJljI6ADA3qfUVlGVtEdEHFq9jldfjvoISNUVIpN3zxxnr/tbSAQfp1rqpNNm0bc2hB4XY7ny2f3L4YHrxXRKN46HWr3R5V4vt9P1yG6ubaI219AxBaMho5lB6sM/Ka8PE2SfM9T04RVl3PMr4yBizsC4+9jpXhzqXloc9WUpaMr27EtyR7Cs22zSilFaj7lcoU7Csm7CcryKlxAfLK4Ge1VCWoTipRLXhEeXdbT/ereSly3OKnUlGpY9LsHXyE/3fyrjdz1qequWzJngn05rLcTbFjcnofxxSaGlqRzseQBigck9ypOd4YfrU6phuVFjUSFgMc1pZtEy93YkL5Gw+lChrca5nqS2ysTjGea0bQRhd6nqX7MX7QvjH9lzx9P8AErwFYWEuqTaTPYQz39v5n2ZZV2s8fo+MgH3NdmWZriMqxPtqUU3br0HUp0asOSav/wAA9+/Z28beO/EXgDxt8SvGmi3zae+itpvh66tVMVvLezyKzxhm+VmKrye3frXZnfEmbcT5ZHLsQ+aUpJRsvPv6HrVsXiZ4N09W21ZX1/zPZv2Vfg1B+zd+zT4c+EZ11rrUmE+qXSzYK7rmRpmUEcBEyq9OTk1/TXBeWPKsgp4er8SWp3YChUo4flPGv2ldR8T+PviB/wAK68GyPPqWqQN9sv2OE0+HOGnkY/dVR0HtX2FfEyjh+SOiS36JHpTo4rFNQWt9C3450bwF8EPhHYfDXTFubmxsrZnuNlwM6jMR87nPGWJ6n8K7cHh3Tp3bO2FN0IKMHojiry60e9awuPC0Miz6ZaxzataltsSLnKxZGTtA28dya7rSfNGb32tucXtZat9Tyl7vUfiV8bNVk1SCGz0vQC9zPIzuv264ZRhAMHARVAxjv7VyurOpilGLdoolUK866lK/Kjcs5tL0fwTrGhW+oQ2s963267ZUCo5yAq79vzNjgcDrXsUrxTdjunONOCSje5xvxivrm78NXus21oirpSwpbNFcBPPEeWLgj5j1I5x6dhXPieWNF26ankZhTfsVdn0D+xlp3wW+KmreNdR+MOl6ve+G9c+E15da9aaFOiNc29uELRXMuVZE3uAqqwV2f5zgCv5Y+klWzSg8mq4VfvHVSh11ex5eZSl/Z0Z0muZyS13+R8Nfs/ahZXer6lpWll7SzuYZ4tPtJpt728BJ8pWbocDA9OK/oHhWWIo4SlSxLvPkje3ex24fEu6gnp066GzqVyvwOOs3uiXkRD3htJZEYgoJAwZgDjgjGOnFe3XqwpRly/ca1KH1ebk9UeGaRZ6LqutapZW+qyre2shmlO8neYomKuD35J/WvkcQ1OblLc+YrVoValSlB7a/NJ/8Ef8AA7xNP8RdSutN8RmNNQhuTJb3E0BKlVTaxxjoV4PB9azy6cq0Jcy1TMcgxU8VRn7RNNP7yl8UNFsLfVIxo8/l21ypQW6rtImALLj1U4ADDtiuzEUrRvE7MfBwlbY19O+yT6B/wlH2QNJbtGZ4EfLPFtILEdcq2f5968+tKbV2Ztxq0zyzWtD+36peag8ccwdzLFIqbS4z1r5yth4Sm3JXufOVMCpVZSkty1p8J1N5Yr+5ZrySJRDcu5dgR0BPcdBWEKMKEfcVghQhTldI3/BllqX9tQWt4xWVZMuFA+Y/0rGb0vI6Yc85pM+ltTtmgsLSJWHy2yEd8HFfjXEtb2uaSXSx7k4ONkVYrkMgdj9c8c18vU1dzmnuQahBbXiFCRnHBFEJuJLaqRsYzXNzpU+xySvrWjjTavE43CpTlqXFuor2EsjZ45qJRexp7SVrEekXD2V4VDcE9KpwXLY0py11OshuBNCrKevWsGuU6U2yO5hWT5T071g5NsbbZTktY0QnaOBTi22VCKVzM0qdV1MxkfxV2Rprl1PNl71fQ2bmTLsGIrGUbPQ7WuWJWeRRjJqomcXfQfG/y479qyqbltW1K9wdxyPXkGrg1FDvoUrmTDbSc0+a+xyy1loQh89R9PeqcjaKstSLzX8z8PSrsrXMXFOQ4YY5PH1qOa2xbUYoAEZtq4zinzSULsyestT6A/Yt8R6N8L/F0HiuTw3Z6jqsp/0RtQK+TbD+/wA969nJZOE3N9TjzCS9kkn9x+of7ANp4++JfxFufid468YfbI2O2ztIExDGvqPWvqaNGbm5yeh8ziV7S3Ktj7T1CWJd0UhJz94Z/KnVkm3Yxpw116mfpVrqQhuGdVWF2wgdgAaxipyRvVcIyVtx2oL9mtVt4tr7xhIox8rn1zWlnFWNKbb99mXrML21u/nyhCqZk2Px9Kl6BfmldHEeLdQnubCNtOs3E0bb1jEpyyD19BXPOKvc0pQnOeux5h8UjoU9re3+oSy2UAtmlvfJJO9uqgY/LAokqbjzSWhrKXuqJwmk/BDwprduvxAvIrmO9udnlQiVtydwSpqY0qdX3rGcaj1T1RY8V+DPA3h/TxosnhWztppXDPbQHaZAepPqa6IuFN2SHPnk7szdT03wjoM0UOl+GbaArAfsl4IN3z9djA+uT+NdjleKSexx1ZTqKzPFPjTqC6prY1EaQEaJwJHt4RG8fHoODWFryuYRcYRsz52/ai1PTLrSnsVZrl1jBEzpiSM+jf41102lHUJS5j5W19ZG0W73OVlRTtcHmvNxTXIztwnL7ZXPJZLSR7rz76dppOzOOcV8pOrUta57dTDRlU5iR1BHA4ojK6OmSdONiBiVyFNZTS5h03eNzMupOSG9eBSabVjmxMkkzcVyYgCa15b1Gdk/4jJbiQQ2eVPJFPmbdkY14y5boy9Pmub24ZSTwa2rtU0kc+GjrqbVv/o8fzcGuS3M7nRKpyuxXnBMoniHA9K6IOK3Ma0eb3om7od8s0QViM9wa55Rd7nXQqJxszRGVAI4HtXPLcuauSW7bsFhyPUVMpMIvk3JC5ZtoH0FOKVrlKd9jX8HTRHxHa2B14aaZ5douFsBcsMjGFTB+Y9Ae2a6MJye2V3Y4MZVai7OzP2N/wCCO/wf1X4bX9lp4EMNlcKJ0t5bYx3sqFSfOumZndmYnhSVAHYEYr7HDctlyanzNem5JuV9vkfptq9xFb2RGRyvTPWvUnLlhqeJQi/aXK2m3MM+niRNxHutVTkpQsFdS9rqQWssGx7oQMMttXcOazptb2NJxkrakOqW+lwAS3s5LKMrAnApTjFayN6M69RWitO5yvibV7xLSRoQ9vCxxtjG3dn+dclRpN20OtUoxjeSb9PMyPhbq1rFoXiBrC/DT3N+qCZlJLEIAQP1ooVqUIz5JdvyFisMvaQclseVeKvGekaZ40fVbvRpyNPh/dSyNlZJCcE/y5rBTUpvTRHbDklSUb7nner+PdD+JOvawNRlh8iziW3hgUEZPADAHGcHHT0rmjOnVrSTKdOULKOp1OkaFPcaXNqmkziEwaeqSkHf5jcdMdc9xXp04xa2Mqj5NGec6v8A2j4j8QXl5rFzZWsiIIpbeUgM3uT1GfQ1k4pzdxpxaszzX4iReEfhPq0viTUtLvI9PAJmuYJNpjyPvA9xn0qZKNJ3RuqjkrRPI/GjfEj4ys3iTwR43t9SsFiJSJ3BYDsGUnJrnk5VPejIEoJ2a1PFfG2geJG1GJ9XtzBd2jneXg2LGc9UdTyD6EVCjJas9GmoQgZXiiWUw5nminfbj7QvJz6ZrSLTZpT3KfhncrvuUDZE4YHp0NdP2Tui1zHmeufYreW7kubOKUZbMcsbcfR0/rXiYik23dHfB87PNdbm0CQM+mwTQuHI8op8gHsTzXh1oR5tFYxrckXpuZkbb3G3p3NJ+7EdJuW4+R/l4/HNcsndkS92YMu9eBxTp7my+EZpJa21D5eQT1rudnA4nFe0PRNKul+zIXbBxxzXDVavoelCUVBF/wC0xFcHv2zWKWpcLORNHPFgb8fnWlhzsiOeeN+BjHamoocZXVijd3K7SBS5UmZStGRWWdSTk9vWnKNglqh0cqM27PA6UJCjK6sX7QBSG459aOWNzXlbO0+C5+FKfEbTb341jU38M203nalaaNGGubxV5ECFiAm84BYngZPNXTdKNRc6ujWFP3ZWdpW0v3Pvf9nv4+a5+298Wf8AhV+heF7TwB8O9A8PXCaDpdowa10ZghCXE4CHz5W9cDr1xnPr4XL8zzjHQq4JOn7NaJK6T7vbW+/daHDKjHBUXUjzVKmjumk27rRX6Wvp8/XptDvl0/wpqOreIb03VxFKdPtJ5I2QXKxfJ5wDKuFbGQAAOeK/q7Kvb4nBUalVWlZc19Ndup+hYei6MKalpdXa9fQ4XU7208JyXms26bLq9K/aZ1hAlfJ4TgcLX0tOEUklubVK3IrLueGfGbxxea343stC0GxW+vJZvNiiktFlSPAwZHU5AVc8Z71bg4w5W7XOOtat+71s+zszM0Pwl4hvptR1iyvra2W1gHmR3IaOS+lYnfN0IEaYwOmSeARkio1/36hrtv0/z/r0N6cKNKShq7fOxxfiHQbJvENnZWGry28c9yTqLO37y5XaSSOgROBnJ9PfGy5Vu7DdepTk77MwPitqqk6fq6S26T3tk8SWMDF1ndflU9BhV657mvQo1bxuRVm5NtHH/FOS28QaT/wjlhZ6pNPe6ekSiC2MnzIv72QBf4M5PoAOvGa8rG1eShNz1uZ5nCjXofu01ovvtr267dlprufUX/BJ/wCGMXxv/Y1+MXg6P4bW3i68k0BNHs9CbU5LH+0pSxnNjNcY+RG8pWYLk4HPv/Hf0oOK1knFHDeGdTk5Zc8ra2jdK7X/AAfmfGY+sp/V6FSXupty8trHwx8IvDVzbfFHU/DeoyJZS6e721zbYaPyJI3JNrzz8rDyvcAc1/TXC+KWIpUq0anMnCLT11urpfp2+R6eEdS3NFX6f16Gv8cfEcOuyroY0xEtb29SLVCyZmTylJyM4x1PPTp1xXs4m9aoqnVHfilVUFC9z59gsr7xB441a/WP7NC83lRRbgBKgO3IPckE181CNatjJvoz5Ghg69bMKlSStrsdh4E0XRY7+XSFuPJv7C7Nqjwj5juXhhjr0Gc8817dKn7OO2x69CKpycEtUQ+NpdR8W31tcvbqJbBzaX6R8bGTkNzjJJz9ex7VlXqSbTaMa8JTndfMy/EutxeA9QtdSgCNdz2aJJZj50uASOVboQQCCDg5ry8XUd3ZGWJqSoKLZyV95GrajM9tbNZl93lo5wI+ckA+me1ecoxnI4nT9vK8djM08T2eqbWgdpd+Ny4GPwPUVlUitjLljzWPQ/h5pjXfi20tN5lJcHLEe3px7V42LUadKTfY68LSfNdo9/1fDkRKMbFCr6HAxX4NmdX22MnLzOyUnKVzHu7K6C/JJt3flXlc13qctWKk9CibS+gk3yyZB9Kc5p6RRgoTpNNsluLaC+t9r9cYBxWcZuMrGytURhXMt3olxkZ255FdMZRlscFeE6cr9C5aalBeFZ4mGe4qW3HRmtCamzptEvVlgEZPPbJrGd3qd10y3O2V5NY21EtGQsQ0bA5HFWlYupK0TG07adWIH96uuCtA4KKvVubF24Vzk9qyem511F7tyn5iu2N/Pakmc8L3sPVwOM8g8ZqKlmdNrIZM+Tk4wTQldCasilcqx579qqNkcstGQAYG3NW7MuMrsay7Gy3GfWnvGxMmlsbvwv8AA9r8UPiJpXgS88daP4Zt9QuRHca7r0/l2tmnUyORzgDt3rKSjT1lsYz9o1dK57J8VfhB/wAE+/gNr9nY2n7Xt38UpYZAdTtfCml/ZYZOP9XHM2/v/F6U6s5upy0Y8y6vY56VScqb9ppLotzsf2FdM/ZT8W/Fe78f/ETw1d2Ghac+7S9FutQMpbGdu8nGTivocihGHNOrrYnFwnOkuVH6yfsR/FPwX8VLe6u/hz4Vh03RbKTy7cRxABse/evoqdd1leOx89WtTly31PcdauDKTGrhDk4OeTSkmyIOyGPLHDpirdyOsQPK7uWNNLlRpBuUmyO/vBZWcconKRFdwUHLt+Hapm7DUXJ6HKapqup6zem4GkOtpF03tteQ+4rOTdtEaRpxg7HHeOptat7We8Hh2eQrGTPEsu3PHC1hUc2r2N4KKdr2PJPGngPxt480xLqbxN/YtvbxebaafaMGdmXnEmeozUVo1Kqsnawc1L4Uru5h+HNF8falpI1T/hPre9uJH2XawxBTHt4yD24qsNGqrvmugmoUXy2LbeDYjAkmqay+qOGDS30lwA9v3Ix9P5V1ShGDV3c5515XtY5T4nXp8PXaSJq9zLpckYZrhrdtqnPBUgcn2Fa0lKpp0OOznd7HkPjfUk1m6up9NuPNmchdkrFTt7fKe9XG19DPk59T5x/aCa/8m4g1GMxXUIwj7MCRfQ1tZWaLcGtD5n1+UHSrt3bO5DggdPavIxkX7FnTRglUieZSoT0bPvXykHfQ+lpW5SIklSPT3reyiiakr7kGA2cHHNYTbvdCpt2MvUB+8yPxqouyOTEJtM6C3haVV5/OtJy5Xc76j/etD9WT/RNiDnFZU5e/qRNtqxV0u2MXse5xW805PcyjBxZcui6x5zzwcik2k9CaqaI7CcyKVZBg053Vma0FeOpaspXtJwRx7VPNdWMU+WrodDazJPAGI7dK5aqaZ6StyKwofB2jr1pQimrmbV2TR5JA9enFOTii4xSPRPgl4v8Ais+uad8MvhfrrabJqGoq093peiwzXwzhdyyFd4Az03KOa7MrjVqYmMVdRvq0rtL8PuujhxKoq8mrux+23/BLLwP/AMKbsY/B3jTXWfxLf/6Tf20t2bmediOZ53JO125OwHC5IGK+yowpUeVbs+fxtWcqTgtmfaeuvLcgwQDdkhTheFrsm+aVjxaMVBJssWFxGjLppPzqgLEDitYyjflRyV0+bm6C3l1bwgF2AIOAKVSpGKsVRhOTMzW57LT7V7tmRZCCd8vb6AdTXFWacbno0E5SSex534i07xX4yaQadLNHBkKZZvlGD3Gelea6VWte7PQjUp0la5yPjqwtfh/oyW1hrjiBSTctCSSznjC8csf61p7OFONkWm5u8kec6/4I8b6pbTX08t1ZWMEG+2tX2+fKBzlsnnJ7VLp1YenY0Sowempy3jzQPDsWpLBrQuN0iqI7qGLy/IlOMZI5696h0eeWpcKihG6Ot8JeEk8JxXSG/vBHcKstwgJkPmNj5165GSPzrvpqFODVznrT9va6OU8UeA9E0jWp5fEkM8JvYmkkuDk7iB8rev4VzVOVSbQS+BI8M1rxB45+LVjqnh7S9Os9Q0yylkjsjv3SyKOCQCP0rOhKvWk+wKmoyT7nkfg3w7YaXfXGlahGILiGYpNBO7WsqoeMqwGCRxWahySs9GehZQV2Y3jj4feN9L1WU2X2m9snjyizTJKCO3OOntVvnivIJTjNnlvjC4RpxZSR+TNu/eRgAKffgDmopy986qSRQ0B8PIoPHlvnHXpXY5Wp3O2nFc9zzfxyz6U8+pCO6jikXBmt3wCf6GvFxNZtM7eaMXoeVaqryO0hnZi/ILPnI968OdWMpXOdRcpe8VIFYfNmlJ8yNFLkehK5Xpx061zWd7F25tSWIgx4PUnrWluUybaYscIE4ZR/GMjFbKXumM22zp7Bpvsq4JHFcc3dnTDmcTRszMQBuJoijppKw+7nngj+UHOKaabsOtfdFa2vbmYnOR7GrlLl0JpaakN5LOXwrHPrWalrqKpFylcRlkSMHr61Ld3YbTcbIitxOXwHIz78U5S5dERFcpt6Hp2ranci0sbeadyMhIYi5x64ANClJnTBSlsdLpulLbyiOVW80Dkuu3H5iu2kocusdToitbH6B/sS+D7v4Yfs6/2nHbyDV/GdyGRQRvNqhwF9geSSeMc1/Q3h5lKo5VCcvim7/wCR62XYROXtZLRHW+OJntp4UvoY2hi2sCpym4HgL7D17mv03CTjObp8rSVnd2s/JdbrR6pbqzetvolFShzX1Z4/8S/EUqC9fm3t1BmU7vmKjPJ9K9RwW8XYlqMKdmeVaR4m0Pw34f1TxjNp0F3q+oxgy3jgBLeBeUQDpzjJJ9ac7/E3oY0owjFzbOG+A+qeK/FvhjxH4x8W38F1HrmvyrJMbsGOO2RSEVduQwJGMDg5PPas8NCcKfO+rKwlaNaDrxuu2n+exy/xgv7600p9W0rS1N1FM5txPPgT7QRhlAyeuQOmAc1dWT5XZiqxk5b6GU+i2PjC21C21TxElwtvpXkW8lqG2xqRmQxZwVzg5c468VvhYyqJ819VYhSk5bbE/wAM/EmufDudtX8JXclrqU2kyRQ/a7USxwWjrsZwXJ/hYktjtkU1g6VSm/aa+Ry1f30Wmz7R+Eesfs6/8E7/ANmzwN8UNa+JPhXUvD2lXd14o1HVtP1DGpeI9WubaSAWcNmpG4Rqyx7nPIUnAwc/5seNuXcb+IPizi8FDD1E3GFGDcEqcaakpOSlvq1018z4tt051KU3JTldarS173v5/wCR+ZmleLrPUPEev/Fe+dLF9Sa7164sIbfy0iaW5LJBgZ2DBHHPAFf3pwnlMOHOG8PhJzbdOnFXfdJI9yjz4XCRirt9TUm1zQviDo97rlloal9Qiht5GuDjdKVIZy30ORnj3r6ya9tRc77ndzL2NzzGztRps26K1gEkWmXU0AbBwckbz6MSMjuK8lUaVKVrruedQqKFVpljQ7a7g8Watqeobkh1i2VbqZxnyZQq4PA4zng96JKSm+XW50U8KozlUb0ZFdXdlbwX9jc3z3L3qgWtzCD56yqco8ik4UcnLZI4rlnrJpqxlUai2uU5HUxrfjOzZPEFnPJqFohI2SAuqrkcenrjj+teZiffg0tzw8Qq2Jg01axgWFy0tjHPPJJGVl2yCRd3zA43MDz+PvXjRqu/mYUKziuWwyPzNQ1D7UCkas4AJGeMY47gVpWqRauJ25rs9V+AmlCfxSJZEC+USwUHJHv9K+Vz2vOGBm/I7qFaMdEeuXjBn4J65FfhNV+87mi1M++lYJkngdQKwgoc2py4huL0Etgl5DsdulKV1LTYdN+1jZleaBraTntWE3d6ByOmyDUbCHU7cq2NwHFOnUcZaDko1I2ZzISbRb3aykDdz716Cj7SFlqzzJKVCemx1Gh3yzoHRhzXO4OGh2Yeupmy04ZQwH1rGUbHXHcazYibPpwaSauVNc0TD06X/ibtj+9XbFLkPOov97Y1b52MhwecVzSZ3VPgKagqwLnr0p2ujCm0idGyPf3qHHqaxlzOw2clcn1qk7IU5WKF07scAc0ouxjKHUjQtncepqucUWouwpBbIY5zSc10KskV7uGORTHJGGB4wRTXvEyWhFZW1vbfJFEqD2FdEpSitDNcu6R63+zD8K/ih8W/HCeG/hXoS3NwR+/u7lv3cI/vH3rqy+jXq1XyvQ83HYuNDU/c/wDYI+Fep/CH4K2vh3X0RL9YQLpoVABbHJ4r6ulalT5bHzvL7WTlY9Xv73ZdYBBYdGPG2nfqdDp2gZGl69qfjLUprmUQ6do2nzbPt08w33LjqEXso9e9KNRSfZImKcZpJXbNi1vdL1yOW70q8iuVQ7ftIPCgccVDlGb0OmcZUtGjH1HXgNRNvYoW8tDuaRePrVboU9YnE+MNagttFcO7zLNPi6HmbduTwKym0tBRi5S1PIvi5qGopqlrpnhuwmtr+7j8i3eBt6ond3PasKi/eqMdLnTQVPku3sc94YsLHwLpVxo+q61ctcRSb2mLcuzHkE+hPeuuEY0Y2M6svaVLs57xp4T8P+M5x4ouftunxQH/AEv7FeMpDdiwB6Up8k2n2FGSiuVK5S0rwx4kis0bxN4ivri0EgXToYWVkWPPDNnkn8a9Cg5ez1ZyV5xeiVjkPjV4P0/VruWztGT7QF3Wt5FgPuA6EA8VVoXFF8sV2Plf42ahc634curLUkb7faEh5H43D1rJ1uUznPlZ8u+IZGTSbuMnHXIHevKxla9GRpRUp1U72POps7TuFfMQeqPo6SkmVy+EI6GtKulhVGVg5CkCsWXR2M2+Yhjx9atK6ObEaJnUQSxxKCPwpzu3Y6qzSqMdK3n5wvBHes4plwSeoyHCtj0PArdtqIRSchbxh5ZJH0rFSlLQzqxc3oQabGd+NuATxVNtJCg/ZysXp4TxJtpxkhV4pao0dGn3oEyOBxU1NUb4eXNuXwPmLGs4KxpJqLJkkCLwMk0pQW4lzyPb/hBYxppulWHw++PnhbQpbxGfxHLbXcmk6hZRYPyy3VxGyzKSAoihViWZTwFJH0eD9jRppQqxXl1Z59Si4ylKWp+p3/BH3wPo5+JN34k0Lx1J4gsY7COGG6vJ2ubiTaPvvIQME56BRj1NexhvZSre7qvM+fxkoQotNWdj9Gb66ERMMFud2eSq/er0KktWkjxIwlJJtlLR57w6hLJfw7EY/usnlqKPNGXvCrKm6a5XqT3V5YfaxHKRvzwKVRwc9S6cKqp3Wxk+LmiWPz2t8oo4BX7x7CuOvNXOrDXUdWcnrNtrF3YG71K4eODPyQLwMD19BXJOU7e9sdMHBNuB5kupWviT4l6Wt6PM07SnaW7dBmLf0CnI+Y1nRqfv0+iNeWcqDvo2L8R7u41i81HxNaaqyQ28RS0MkHDNnjj0HtXVUxEKknJFUqUowUTw3xN408QXnj+7K6ZJcpHoLSTz8GKRxjBAHQjHSuWWJ56zjY6PZKNNd7nRfDL4n6j4z0TVNa8LaiZDbW6lJY4iFSZFAdACODnjPT+dbU5uqtERUjCklFnPfETXvE/i3Vry30C4u5Ly705DPa3g/wBUSPvJnj8Kia97lW5m+VpXPLdG8P8AiTwnLPb65qB+3AtNHNbWgjK46nK4GeenerpS5Gdc5csVY8h+JWs6nq/iNdbl1cNDuKvdKNvmAnrkdD7VnUnFy5i6aco6mD4r8R3PhC1OpWCzuFtyYrhI+GGehA4IrOUpO9janyzZ4t4q1vUfEV4urXUMeZskiNsEZ56fw/Sijbm1OqmruyKujSuyzyoknyWzksOv1/Wuyrb2djrinsjgvEvia10bULi3uLPZNsG6OVN0cox1ZeleLibU4u3U640mldnmHiLU7HUJvtNlpMNpk4byM7WP0PSvEcYS1SM1JyZkxSyGQAH8PWlJRihxjHm1JvLccseM/lXM3d3N1a1kTICNwHpSbbMKiaZNasWkAPQMK1hG61IUU9TqtNQvbICB071lKKuddP4TStQEAJH51EttDWLdyW4jEqYxn+lZpu43vqVURYEwPwIrZQM5aMq3GZJi351LQQd1qOGGix3HXilKALSREgVOPXvRy3HPUu2l5PCd1tMyNjGVYjj6imrpgnKx33wHsNT8d/EXSvB2p6xBaaXLcB9Vvrp1jjtbZfmkkLnHOMgepIr18olHFZlSo1ZWhfVvsd+EblNRm9D9Ffg/45+C/wAY9b1nxGvji10v4cfDrSyNT1K6R40mhVMLBG+COcbjnBbtnNfTcf8AjHS4ejSynIZwjiXbljJSbmrpNRUU1pu+Zx02u9D1cVnn1LCw+rpuTbXTTz7/AHXPK/hn+0/pX7T97Pc+FvDFtp1pdXD23hTS7f7WXCrIYohO1wFUO+BIAmVAYAkHIH7FwLxNmeKyz22bpJpO7V0k/n23Ky3Ma9WjUq4lu0Xfmdldbt6dOmup5R+1JefGg3kPgLRvhFqd3HNNHHf3MkflxzR7vm2S9DnGODX2scwli4Kng5Kdt9Vt30NcVjXiElhmtTlv2iPCGu2Xg3/hALmzi0u4udPWS/s7SIlYS/ypDuJ5IUc168XKdPk1vbXTT79v680ehHBN0E5u+hlL4itvA/w7vfDeleFraA6dDbpbzpDliwyQQPXP867Lqckk7JDUo0YKLbsux5T4a1fxx8QnuZbv7JcxwOTfaqsryGOaZsmIDGNwUgE9s+1ZYZqcnGOyOWnOeKm56pIg1LWJITrWieFdKKXN/wCXZx3KXeCIxgM3+zwDxXVztSko7nXUkqSQ/wAdjT9G0D+yraG7tmsLdmNys3mPcQIMlemRkg5+vanXnJYX3rrl103f6/5nLVlUmnZ6HoWr/wDBN34k/tgfsL+Fvjf8JLDT77xDaeKZ7G3N9eJEyFYhILdkGMR7Ukk8yTgEkZweP5+8QOO8JkXFUaVaLtGKu0tdXoceeYnA43BrCu8a9OPNF2dmr669+lj5M8K6B4u1u2Pg2xuIjb2BNl4kZV3kMsm1o1KE7wCCMqSMc5xX6dldeedYSlOi/caTd9Dy8v58RhoSv0szqvijqNl8L9Mn0ixmWSay05oYLZceU0jFdrf7RGCM+5r6DFt0Ka5HbRq3R7f0vVnr15unhG4rXoeGxXfji2v5tRudQ8y/SQC4jkceXNEx+59B618wsNjVU9rKWvY+Nhh8x9u6nNeSfyseg6L4k1CeyuLLxBaSWVyji5aXO4jYMgKe6kfKR6GvbpTcoNSunufSU6tR3jUWpk6D4U8Sahrs/iG01dF2nDW0ZAXy2ByQCfu4PIrgqU61WrdPQ4nCpKq5X07Gv400QeEtGt/EE0kT3Cf6swhZI5kByCSOhFc+MpxoR5pBUk6cXNbHCeJ9etNduv7esLCCK5lfbcC3H7mRcdfY1484wrPmhuzyZ8tR81MqxOJZ5fIT94p3NkbdhzgkY46VDgoRsZ8rnKx6l8CLq6sNejuLa3iYGPO2a7ESSHByCx6E4r5jiWpSjl0+l0dD5cPTc2erw3NrrHhnT/GGnXkEttqDzRPHHIWa1uIiPMhfIHIDIQRwysD6gfgWIhOjUtLqThcXHExbRn6krsnHcdawhJOaN6kOdFfSGkichuhPOTTqTeyM6LUNGaN3B58ZbuB19awuzduNRGc4aFtp6ClFO5ztOEtSrqmmxanBtZRvA+U11Uq0oMmap1o2MjTLi60a68ifpnvXS7SV9zhcJUJXOostQjnQMhzkc4rCSaPQoVVOJalYfZ2IPY1yy0kbvVGDpBJ1Zs4xurtg24WR58eWFY2bqQBySKya1O6o7wKMkyl9oPGetUn2OON2yxb7iMv+dTJ9DrhFRQy6kIxjvUPY55v3io7DPvUpGqXukZYk8/yrayOSWjFOSnPFZyTT0NoakU2QMqORWtNJvUKmiI4UMhOO/qK0lKysZQSZ6/8Aslw/Ey++Ken+H/APjVdFiluUa/u5rnyo1jBBOfU8VrgpVpV1GDsjhx8KHJqrs/e/9ni6kufhlaf8TD7SBCF+0r0kwPvZr7KnHlgm9zwW41IOO35mt4vg8SfZGk0BIPNHLJP/ABqOorCs6jXuHXSVOXuyM/wFpl34t0n+2fGehxaZbxuRFpUCAhyM/M575rKCqVI3mrIus4YVqNPqdFYtbW2m3ItLBLazT5YYIEABJ71tTXLHTY5ZO8tdWc/rN1fXMjw6ZCFt4o8Ts4ABP1p+/wDI0b5Vc8/8apZ3GsAeT55MXy2pYBQ+OCfWjkUpXZUZy5bI8w8T3Guf2jHqdlPJbXEB2XVzKMxOuQCsY69+tYSvCpc0iqdONmjnviPqGtX+rDTGmtYI7vZEbwphgM9/Srk23qYJqb0Itbt7vSXmmsZPNi8oW95ayuNrn+/mtIJ82goqyszHXwzL4WhKzG6EM5Dxhbvcid+euB2r0YRcI2OWq+Z3OM+JunWWqQyraedb6kmJEHmZDY9CO1Q4t6o5/azsfJ3xk1Swu3vjdS+RfICsqMpAb161M4xBuSV2fMXiyVTY3WAAdxH0rysbD9yzooS/eRZ55OWXI7/SvnqcYn0VOTK8mdmcdaKr1sKbuysMBTmsrGtLYzbw/vtprSOxyV3udIoJjXHBwOabV6h3VYJzZbtIyy/MMAd6bikTBdiVbZfMDAc1lUegKVpWG39qGQcD24rKD1NlG7IrW3IOQMCuhpI5pR94utA7REAZHrWF7SN3BTgN0sPDNjHfvWjvYypv2c7GvIBw4OOPWlHQ6p2tcdDJk+3es5PUqLdrm94DsfAcvi6x1DxrBqsqW82YLbS9PjuWmc8bSJMhc+uD644rfBSw9KupVP0/U83G+0qU2k7H72/8EavDT6F8JbnVo/Ar+HLSVVaHT5pmklZccSSlud5B+nJr7rBVqU6CUdz4zHqrZuT0Z9gx6rdJI6XrL+9P7tEHA+vpXRzSW7B0YezXL0JbzU7GzVW1C7RAnzEk8KPrTdWCkrs4vZyk3yIradc6Nr1z/auj3azgHaXQ5UGnJ05vmhqdC9tQhyVFYq+KtdsbFTcX7RsYh8gI4X6+tcVapG+p0Yem2tDjbuy1H4hsFivlsdPjO6Yxna8nqaxjBYjVvY1p0qeFVoxtdt6d3q38zEv9Gkjg/wCEb8KCFNsh3SJACW54Lfp9ayjFW5b669PP/L79zapPklzHl/xJ+0eF52D2892sEbBoZZtqzykcn0AHT8K56vuaJm0KrqRstDzj4fXHhbTtE1fV9bvZxeyp/pUKpmKzOfuBv4htx6dTU4ZRjFyudco3SuVP2efFes6rpusf8IJJbyWs2rXBaZYGjBhDYyqMPmJ9q6KcZauDv5+Ry4qnFT1ZBOPiKPjZb6pq/hO5s9Iu7MJFfKzI7Sq3WSNsYT6VjJ1XXTlsVRUI0W0P+PkOsxXsWvkWYe2mWN4rKNQsoPRiMDDY/Ouhxad4jUlJanzH+0HpscWmvrS6OqWnm+ZG9vGUVz3JHfntWcqGnM3odNCprynk+pa3qN1aiaGGQqIv3SwE7GB6gr0B96h3tY6bOUrHAeKYktmkubvT3EgciXzPlYc9Djr9fetKUdTthHl0M5bm8h8Ja7qNtLEj/Zkij3HG7c3Y9jgVeKdqOh00klUWh57rlu2s6MLzXFvTPFGFF3GuYtv91mHp6+lePVcqlP3tDZwqTna+hweraTYW8Ujxa/ZysrYWGJ2JI/EV5Cik9GbTpQpx3MyJQp57GiVzkbvqiZWUndnr1rJtG1F6k0eChz1zmoSuwrJbjrQfvymc/MDXTF+6Yw952Ou0sAWq/T8q5qj1O2KtGxeWUBfm9awWrHTauONyiKdzY/GtNAqSsVXnVshTz603LQiK52VwzF/m71ncpQUWSA7VyQc4/Om5FSkuhCZCx+QU1sZqLLVmis4zwD6Csm3GWho5RtY9M+BnwO8cfHj4naB8G/AulNLq2vXixQxPkKidWmkA5CKuWP0rix+YrKcM8RKPNLaKWrb7Cq1YUIOpU0UVdn394+8E/CD4Z6Jafsc/BTTotSsvDfzeLNRkhMh1bUgAXY4yNqHIAIIHTtk/SeAPA888zPEca51F1MTPmhRg0nGEFu46aa9fnc9nJsJKrhPreL3l8K7Lp82cFqOl2U/jbTtH0aL+z3tiJZZLW0DAlTkrjHGRxx69q/rnERoVYqi3aOl7W+757HuRg1C7Scdj5t8d/E/9pL4C/FDx3Z/D3Vk1nw0upW98PC2tRB7e1eYkGa1kJzDIoBPHr718N7DMOF8/nPCK+Hla/wA2ePUwNeliXXjOyXTui54m8Qa142tdNuLu/k8y5VZnmbBYouWcknOOOMnk9q/Xk1UivZO19T31iadCim9b/qcf8TprHULW4ttOu5rWSQSMGnmJM7IpwQFHA6AD19O01JNxt1CXs5xVtNDzXR9F1b4c+HbceH7n7Mbq1lnvI1mDecc5bcQfkDd2POBgUUqbpRfQ5+dRjy09Sj8Itf0TxFDqGt63o09lNb3/AO8gmLIXPZh0JT/JrpoVYSvK1mZUZTqtzkmmtNSl4p8b2mr6ZqV9BOqv9k8q2jfCLGi7tzYJ43ZP1wKxknOrOXM1dLRuyVru+vV/jZCxFVSh7NJXPo342+N/Gf7L3/BFnwxpWg6YR4j8b29xeXF810UNva6jK0EZCbSN7QxygHPAc888/wAq5pQnxj4q4mClejSSTS2bXd+p4GMrYqWGq1ot2ilFer8z4J+B+o2nh+yktV1iS1jNu0d5Mznagx8xDDnJ5AOK/ofJlSwlFQjoloTlEYLDKMG3b8zc1jVx4zSfUkthPHNOu043GNI+4BOQx7Dv1r1q8va1L3PSlUdV+z6GV4/iXRCrnToR9ssYjcSWwDL5pbg+xwOQaxxSaSt1MsXONJLlXkdNfappWt+G5LNrm1yFjhuHkAEmHQYYewYf+PVpB04QbkxNyqRs1v1OLsLfVPDoksr8FHtXMkkYlPzKc5dGOMcYOOnoK46lWyslYxVF076nK+IL/Tb/AF2bSoPFkyQyyZsWlf8AdD0PXj3HvXzOOrQjUcXLc8XF16Mq0qCqtX27FfS9OutLnbSNVCqH4S4Qgxyrycg9MA1lhYVIq0gw+GqYeny1N+5PF5kdxFJATtKbWcHBc+lPEWSBWjNHo3hfwHrfxF0SLwjoWmxXcl64WOOW6SBUHJLmSRlVQByckYxXx+fzp0sulOeljPMKMsRhHGKPan8CeB/hz4I0jwp4d8SDUdWS5muNeFkimwgdkiVI4Zc5mYbW3uPkJxtLDk/iOaYiliaicGVgsNWw8ORtfIzbt1MR9q8uPxHa7op2jDzSG79DXTL4UjFq8tDRjmYDa4FYtK5a9zUgvrfeu5eBioehUkpxM7e8T7T0z1q4tW2OSzhIg1KxW9h3oPnA4NbU60oPQqpGNeFihpt/cWE3lOeAe9U29zlo81KpZnQxXqS2pZWxleRWTi5M7pVexlaPJu1YqP71dlOKjE5ItzqG1e5DE81yzlqd1RWjYz7eLzJmz0qZPsYwSiXkZY0wQaEuppGV2U7lyxwT9PahS1Odr3xkYBOWGPrTavsavSIyQc+npVJGE73IwXyQVOKt2Kg02OkUNHkjipUrMqsnyEVs+3lTn0rSS7nPFSkz2X9kf/hnzTvH1vr3x48T30MccoFlp1ip/evngufTOK2wssNGpeozPEUYuN29j92f2aLjTZvg/puoaTGwtJola2Ruuzt+lfWqKdJK2h4DqRqS0O1mvYXmjVm3t3wv3RT5dS7OKHXcixRCDzCI8ZYAYJNaXsiObmZVtpIWgnlm3fMP3UAbv2OO1S5LlG03K/Q5jXdJ8Q6dbXKx60hadNzIwBES+gHc1yyu9Ewm4ykjz3xobTw5Ml5MGnmNqRBGTg7yfvNgVpCXK+UI1JP3Ujh/HV3Pda3Z3N5aFV02386JnuAsMx4yNg5OKKsIqd5ChBuMn3Od+JeuW15qP20WkOI5IXk2jO8kjAHv7U1acrE0m4opeIdTt/FsUlnc6PIBCuWeMFADx971rqXLB2E4u1yhrJsdRktLPTgrslvz/pGPwI71q62trGXs/duePfHZkjiln0OWaK5hiBDwzEqCDyCP4abqXWgKKjufMPxB1rTPHmn3sGpQtFqkf8TjG/HWsJOetzOdm7Hzf4sEsIubabIKsRya8rGyfsWjTDRj7ZHCXfzKQp/GvBop6XPok4pFdgwjIPpWlVozdmtCumCp9Kxd0zSm/dM2/Ubi2eh61Sdjlrx0Z0sIzGM9MdaJfxDtrNuq7Fu3Y8A1fQdNMsM2zD7eg6VDSkTL4xmXnOO30qVTUTpjqiSOIqucc5qpMwmrMsRNuBX8qwcWmdEErDHiaKUSqK0Wxz1U1O5oWbfaLbk8445oudEGpR1BR5HLfjWUk27mbqK9j2n4IfBPxlpuoad448f6fa6J4euQktvqOqeIZLPepOFdYbeQTTg9AoGDnkivXwOBxVGpGpOyi+pyVqtOpTfK9UfuX/wS4i8NaR8HZE8I+FrrS9PMuQl3btEZzjmUK7O6qe25ia+rowjGCcdT5jHtyhyt3Z9E+HL861qlzJbKPKifEkjLxx2HrVKTbskccpxjStIta94a1LxHALSEpbWxf940nzM656Adq2VKVTZWRlTr0aLblqzSh0+w0LSBZ2m2OOKPqi1pOmoUrJnJ9YniK92crpfhbU/F1y2oahG0VkkmUW5HMv4dhXnUcJVru728z16mIpYeHLu/IoeNvh34m1RbhNJ8VfY/NXaDa26hYkHb+dW8JJP4rehtTxlJU0lHXzONsPhx4ttfCVzFpfjloIlbaJsqZJpPcgcD+dZRpRjTfLIU6kJVPeieN+MvA+ua/wCOFgXWb/Xri0tiZ4ZLgRRQ4HLYUda8mpTftN7ndTqRjG7VjmdH8UWnw+sPEHgYfDy71mK9T7TZ3M8uWZyQTGXz0z+YqsPVlRco2uGJcqvK1pY6X4R+MfBunX0FhrVgmlatZwug0vygjxgrnepICnOfXtXpYatGq7NWsclSE2rp3OI8NfFLUPjB8V/EvgfT/Flpq0OnQIPJWVGnjl5+Rg3C4x2NS61KeIlFdDtdD2VJTkjhfHHhn4oweIJtJ1jxxd/Z4T8tnb20cjRjPIcNncPSoaknowbpzVoo8u/aH07xdomhXWm6hc2E9ltE1rDc2TRPn1XHANW3NQaOhRhFaLU+ej4jn1WwWO1sWtiqkeUW278dRnFcSbZvSXVnLeObzdI5RJFjLDa7Nk8/wmtacrPU6ott3RleIHa3+HU5OG36lFkheMAHrV4r3qWh3QahY4rXtAnksZ4rHSrwRzRhleynOxj/ALQPSvMqQkqdkPnbd7Hn2r+FdV0ohrzTZIlxw8mM/nXg1VUhO7ISd9UZnktuBL+wJqXNtFNKKHohPy5+lZu4qbSkTRAiMg/nVRdjWorq4lgxF4YyeNw5rpXwmFNpSO0sBts0B9K5JnbzaFmNGZgT+lZLQVN6iXlsdnXkUKSuXNXRVSMxjJ5rRpMzhLlE3gN92jlRTlzDnXcuDWcrJktWdxqKqe9UtUO7ktC3p9wlldw3jWkc6xSqzQTFtkgBztbaQcH2OaiajZq5pTgk02rn1B+xr+2tdfBT463vi3wL8P8Awzp+o+KNEOjW95LA0UOhlv8AltG7yOxHdix5x6V89jMkxGKqUZYeu4ygpJ82t+ZNN+ttmenUo4XN5RoVo2jdOye9ujPbvgn4I8R+HNLvtV8c380l3A8st1cFDsvix3CZCfmdHzkHvmv6m8L6mGp8K0qdHSNL3Xp1W+m/+Z9RCtTxFNKla23pYi0q71iPxxd+N11dTM0DxWwSDi3yCO4wG54xnH4V+pww9CcLTiVVowpUoxeqer377P8Apq33Hz98d/h9rnizXdQsNJ1G7lFzbQ2lxuG7e4fMkhx/dUd+5rhzDDU8bONKN+ifye5y4lKc9Fa5yfjqaXwlZi8e9eO0tikE08zlfNROigd8kjpXu160cNQvJ2Ud76BKUaVC03ojz6H4g+EPiJPqOtxaytpNC3lx2cRUyQRE8AKxzuYn3ODmsMHj6eMourCSfzOOhjadSnGEXd9TjNSn09ftWm6XYXsdvp9wHvIBd+YLonlImOOOevWvQhXjVdr6K1/M3VRU5czGfFLxtqupxXOnaiscJS1jCJbAYtWVc+SMY5Pf3PtWsp3v2LnX9pTUjzTVIr298fQ6PduY9M1GyUMwkzypzycYzya8bEyn9aqTb932cn80rnkQp1JZmnPWLPp7/gspqOneKfiN4b+GvhzS9Q0rw54V8G6VpelR3d2wN1HbKyeb5HCoN7MUkGd6sSDjFfgHgtl6zCjj8xqy/eTqyb8tTz1g6uOy2UajteTZ8b6V4J1PSri90os0jKqsy5x5it91QO/rX71TwH1aLXMaYLBVMInBMm8N+INN0mTUZ7i3KXUdzmwQNvMbBsKfxGRmnTrppq+prhq8Y1ZJ79i4gujrGoahdSr5ZGZbC4cPlcfeH97HqORXRKuorU3lTc53voYXirXJ9Ov553he4sJowIzGO6jgn2B5ryMbNwk3J3izgx+Lng/etp5GP4v8TeJvF1rbSWVwhggRVmz1wPrzjHavNrYutUivZ7HiY7E4zGUovD7dSr4h8Mqmhw3k3kXMAXdCI2JYnurY5HrXHicPzwu1c2rZanh41Ki5rak/hlZZbY6Y8ksKTIDGlwMqPc56fWu7CpRpqJthm6sOVFq5iXTZEsb8ESH7lzE4YN9Mf0rCtFKXvbETouFRXZ7N8DrO01Tw7JJdWiERkFc9Q3rzX5rx/VjLLYwjpqejyr6ud1zGojUbeOBX4+4cr1OPm5ZWILtx3PWs5pXNHqiGyTMgPHJ70SbsiFZMvv8A6vA7VDZpUS5bojSbzFKNUXZlB2ZRvYOpH4U0n1KqU7oqwylG2Mf1qtHscivGRDqNgl0hliGGropms4Rmroq2V/Nbh4ZTjjjNbSXU55e5uO8OSiTVS+7+OtU/3bOfDybr2Ojum3Ftv/668+Wkj1JoqwLtkJxjNNK7Ja90nfJOAOMUS0QkrIpTKxfg8dzUoLJaiYAGB+taLRGTldkMr7ODn3zS5riauxkLmQn6U76FRikyZ8CHkdulZ3HUehBAoJ24xWnvNXOVSfQ9n/Zb/Z3uPij4o0/xRqWu6Xb6ZFfpE8T3am5d8ghVjzu59a9fK8DCrNVKr0OTHzqxpNRW5+8/wp0hPCHww0fw7bxlBBZooXv0r6WrUUnaOx4lCi4x13No3lvaxlZtq7cs7E5qZTUUbTTtYxrK91zxxrHk2CiHT4QQ94f4j6D8qwbqSafRiVOMfee5sDRdL0+0mslaSS4lOPNkc5Kj0qlGNipVLtHMX6WGmM7QQSyEphpLiQnyz2qVBX0RLlzLU87vNS0/xF47fUNRi3Q21uUlLH5Semc0U03UckLl9xLzPMPi5p3hy6ube6DyRXCXAK3QlJhCKfugdxRJJyvJ6G8qnsoOJw41z7bq896LyFo3v1FkbhCELDGTz0FZwn+9ck9DKEPdWhq+L7XXL6J3tbxoJBzE0K/upsfwj3rvUrszcoxVjmrqwSXSr7xLcR3S3saBZYdhV4j6+9azpqcTOUmny2PNPijqP/CReHbnxDpM/wBnvoEAfdjbKOnNOmo3M25LQ+RfH3iCJ5LmW5hUXAJ3+X/C3rRUXM7GUtzxXxfOZbWe6dssT3714+MtGkzqw0OeukjgpJWMmST+NeSlHkVj3XpoNmfdGa55xdxTi4rUqCQgEUNWWpdJrlM27kJRiTxmlLQwxLtF2OpgYeUoB6ino56ndOyqst2xPfjnvTk1FFxSJZZlAIY/hWakjln8Q61w2OgpTmddJLlLRxjJHXvUc6IqrXQWIAdsZ70pTQ6LHuN0fTpSUtC6seZC2Nw0E23OOKuKuYw0Vi3OGZgeOabcUjRU0tTsvgZ4W0PX/H+mzeJ7fWraKK7QxarpemPdguDxEVzgZOBkAkGunDV37Rb2ucGLdKNOSWj7n9Bn/BNq0lsv2erV7nSbzSzPIxFnqTSNcKCeN5k+bJ64PTOBX2FCopU03pc+UrKd7vU+jLOaO2aPS9LhVELZYbep712JbRRh7JOLqTLmvanJZwBIVO88ACqr1JRSijloUPazcnsQWLvBYG81N97Yzs9KlOFOHNN3NJxh7XlponD3GoafvcNbIeenOK39o6lK+yMuXkra6s5XxpcNBoM95cav9isAhU7SGZz7d8mvOqqpJaOyPVozhz8qV5Hm0ngz4g23g24vNOvxp0U7F7eC4+aVR/eOe5rBYZ+y1dkzaVRPEJSRh/C6+svh1ompav4ruRcandRStLc3KgAjIGSR2rCCo0YWkVjKrrSSW1zzzwZ8QPBHjz4meKWV5DZWcKLAbi3MUVyO/lM4AkxyMjNZYRwq1W9kDjW5I8pl+FNG1Xxd471691HSLPUNPb5LeCVzHNChHBJb72Pbiu2lC05KxrVlTpU0upb8dfDfwb4OEjf8IjaWV/c2ZkW80S3KSFgM5Yr/ADq6mHo3vZXI9tXqJK55dHqGtfE/R/tlncQCOzR4pZjcIkznJ+RmzncMd6wj+Bbi6OvU8d+JNvrdrp7WLy3V4GJESXTLKsvBymV6Hr1q3eKsdUG5/EeA6e1lY6y9vqFv9ljMjFbW8DJg9wDggA1585RhLQ7Wmo6HD+NpbOS9nWzlJUS/KhlztA7H/GqpyvudVLmsUdfszc/DK9mKlTDewszBc461tVkvZHXSi2zzLxXJqzqNU0WeOW22bZmtZiCD/tL2rysUqk6d47Ft8jOVvr25uF/fSMzDu5NeCubZjjapqzOYMzA5/SttEiXq7CtvQbsc9cVNkyUlzWJojlD+orNw1NKr0IbJyNQO71FdUV7pzq3MdtYMTZpz2rkraM7FpEuQsSw+lYbjhZC3YbZub04xTUWaSqaaFASjJ6+/FaONjGzkxwBb5ie9RKaWxrFKIkqysMKR7VmndkzaYkUbA5brV2layIi5dCdAx4VuKnks9TaLla7NHw5qVxoet2erW9xLA1vcI/nQY8xBnkrnjOM9a0oaVL9jKdSSlofoh8C/jDrvx18I63rl3a6kZoIIYrS81jUmu57qKNNqO7HAQADiNQAoGO1f0D4YUIUMmqKmrJzb+8+jyirGnh0oRSSfTuZ3i26tvC+jQR2zYkjRpZmkf/WuT0A7DtX6tSjUkm5S9D2XOdV3OL8TLbroGp6/rlwtvcXR+WOMbBtPJC06lGnKDirq6tdO33Nar1M6lSSnzI+YfjL8TNZuvB3irxH8NdJvLnxv4bvtHk8ESNFDJp8T/asTmZJARK2NgUEYGST2r8w8T81xVGphMvjf2da6k09dNl8z4Ti/EZi1To4Vaybv6HzP4b+Gmo+OPG3irxb8Q9Qli1RI5LnWZbaPyUFyRubZGmMYPQAdTxX0vC2TUcPl0ad2klrudXD+XOTUZ35ra69RLv4MfEHwvNep4c8ZSQpbW0NzcLcPuLPu4jAPJbByf/rV9T/Z08M3yVH6M9yeBxSi/ZVPvKWi6h49vNXvn17wjL9jgi824urfnzXX+I5/z2rfDvGKcpVIad0Y0J5h7Zwrx91bM7z4Tt4U8UX2h6PcIt7dnVNPSYqQrMWuAjJjqMk9q5M1xNKGRYiSlqoS8uh3yrUPYzcXay/E+lf+Cwn2bxZ+1t46m/sSW1g0J7PRbRZZQwiCwhxGMADC5LA47mvxr6PuGVPg6tVa+Obd+t7nLlapPJovd9/M+Hri5WLUUu7+8dz5p+zzxjGwxk7VI7/Wv2ypiac7dv8AIyniI0+juc14J1u61bxLqeszaRCgeUoytFnykHGVHP6V4mBrRnXlK3U+WyrEVMZiak5q2poeO7zTtDuUsRCsFw1kWgSJshuMhsj19K6MbiqVO6j8Vj3Mbi6WDai9ZPZHKS3fijxbdwzXkywW0YUtBFjanbdjrznmvFg8RjK16m3Y8OnHG5hW5qrtHsXJ9JvPCojsrqOIi4YNDI7bgGOeVx7dq7Xh40ZJdzp9l9RrKHRlq+jgs1j1SaPytqorKoIWQd2APGM5rSUYxTkz0cTPko83Qoarq9kmqf6HcBZ5IQ/kwygIvBJzjjB9K4J4mEZ8qZ4lLHxp1nCMtWuhW8g317DcSSt5mS+xSCiH6fT1xXBiazm7I6KktFJvU+j/AIC2TDwZKXUIpwCvQivznjlKOFhfudVKpKULGtqB1bTbkSWASaHPzwsACfxr8wSpVL3djlrQrxnzR2HyEzjzAuMjoTyPauNu0rHZBxcRLQbDg9+lN7EzVi15wIIYYGKiS1LtzUyFSVc4OOeDTtoYp2Yk3zcDFQ2buWhn3cJU7gSCO9CTbOSokRRzsTjJzW0W4qyHBNFXVrKRojPF1A5xWiquTSZFZRaM/wAKTlNRPmHkPjFdUqcnC62PNoyaraHWzzAuQp4rkaitz1veluLDEWw7Gp54o0ukPk+UbV6etRJ3MnLsV5FJGSKUdx83ukQwCR09aubdjBaMikQSnGPrWabRal0ESNY+nStUu5Ll7wkjkjBPShRdyZST0I41BlygHNbN8kQglE+oP+CVPwZX4q/tYaNdzWDyxaOTcyOM7FI4GfWurK+edR9jlx1dKNj9wY45Yo1iVACBt3HoBX1CVlqeQ5dihdw6bqF+lpNdt9mhOZFReHPoTWEk5ysCbauar6xZWdu1pY26xQKAAgGAPrWySskiKl3a5h+IteS4mFrb3hQ7cmQIR+tXboQoW1ZyPir/AISB7X7VNdyxQs6osIIJYZ61lVUoaJm8I80dEc74vuNC0+0lt4byeBhCTIoXLOe+KOeMFYVvZq9tTxv4rJceI7zSvCWkTyxQEGUxuoLyrjJz6VjUfPNRME/ecjO1qPRLPTrbTNVMTaXNHtdZECskpIA5rqjGKjaxvBcsH3Mvxe954avovD8upRPZ5Q2+6f7vfGTWqXI7HA3GUuZIzPEOqPpVo95dSOLeVSsiLKCw/wARWzlymqT3Z4d4rsNQ8PW2o69oupvLbuzFo2OQN3ZgegNSnbUmpUjI+UfiTcZ1a4u4Y9kcjHeg9aicm2YPXQ8r8ZTm3tHOMruyPSvOxsH7Bs9DARtUOY1i70aXT4o7KItct80so4Vf9nFefGMfZ3Z6dWf71GTLI+zBP41jJI0mnKFyq8hUHFZz1RNHVWM6didwI/Ss5IwxDsrHVWh3IueOKzm7VDuqX9qy/bjpg8VNSbaHCTTsOeHc+N1ZxkXJKRat18tcEfnVON9TOLadkT7FZSAf0rLVM6GrrUIAAQPzpuLZjZxkXFjV0wcVCumbqSK89r5Mm9K6YNtGTdndF+xR7rZBBC0ssjBUiRSzMT2AHU1LpynKyVw5pNH0j+yX+zl8XLXxzaz+N/BGs6HAu27sY9T8WnRYLpgQUEkJHmSqf9gA+/Ne7l+XYiHvTWnm7Hm4icKsWk1c/cf9jC51y6+FVnda7FZrLgCQ2KERLgfdQk5YDpubk45r6CDlGKX3nhV/ZuLSZ7T4buYJL6WYfO68Fh0rohJrU5MRH9xZMt6jd4k82SNRg9D1ArN1HKV2YUqSUeVMdp8raiiyCHZGp6sOtdEIe0V2tDmrr2Umr3ZR13WBfMNJt3wjNgmPkn/CsKmIVaXs47HVhcPKn+8ktTj/ABpo9xqF1H9uvY44LTDKX+ZYx6/7Te1Y1tGuyPTpOnGle2rOE8W3mu+PfFUHg3wXJNFaPMovLmYgPIOuBnucH2Argkq2IqWhsW5xpU3N6s88/aX0q71rxDH8P9C01/Ke2WC4dZSCqGRQxI9+eK5sTzuo6a1SDDRtF1JrqRfEfwO2q6/DpelaWtzHo8lukMEcYVV+XLY29e5rrjTcpqy0R0UFy07LqQeMfiF4W+Hk0mteMWhgivNLCLbxxsrhzjG0jqT2rplWoQfvPyOWnS9tPl7Hn/jT4nS/GCeR/DOna7bwWFr5c9hcXAt5FQjqhOCQRXHVqc8nZPUitRdGXvLfqfL/AMNPAmh/Dn4l654L0831ql9dG9VZb+S4jG5slmO75Wz2rHD0VCbTuelDmqUlN9De+Onhy00jw2/jHwdq01rd2rFb8ycqT67T1z6gV6M4OULxY4z5nZI8U0nxHJ4nlZr6VZfNjYbjCGDt9SPlPpXnSalubwi9jy/xpdefqUkrzhtp2iUxBWI9Gx3FXSg0jvpqwrR/bvAGp6eF3o7xZ3cAjJ5PpXROlHkep2UubmPIPGNtq2lS/wBnTebGiE7A4QnHbDjlh9a8XFVHShyxYVVrscxP8w3n8TXip+8FNt6MrjIyffmrexE9wbBUnH4elZOVmJbjYZFUnLUOTNp3cSO1k3X+Fxk10QnaOpzKKU9TtrBiLNcjnFclSXM7nfpylqKYLgg81EI31Jih80gkTDHjqKptI1ULIqvGAxrO9zOUrux2v7OPwQ8RftK/HDw58C/CV/bWt/4ivhbxXV2SIoRglnbHOABWVecaFLnl3S7avRHLia31ei5tXsZvxc+G/iD4MfEzXfhV4uhEep+H9VmsbxR0Z42K7h6qcZB9CKqhONSF0bQkqkFJbNXOegUvyR16c1rKXY2iuVXLVvEGwFGSahJyYpTdzSs9KeZ0Vc5J6YrRRey3M3vc+8P2QPD0mmfs8xC3zbGS9/0uXy8F164OfUAgV/RHA+ExdHI4KHuttXdr6X1XzWnlufVZW1GjZrVlvxJ8M7rXbtvib8Tta07wz4M0xsW99rF0bdbmQHgRqAWmI44UHrX0eccW5RkK5MRU959Op3TzTAYX925XkeL/ALVmuaZq+naprtrf+JNM0HTLtbNLu88Mmxe+u2UGOztYJW8yV3HJcqFUHJOSAebCcZPMpww1ChP3rcrSeresbet1Y8jEZsuV0oU3zp2s/XXoeLabZS/Dz4bH4t/EzSVtQ1nLLp+ng5827JBjyB94J3PTdmvRxOVVc8VDEY9fwtYpdzOhhq2Km6lbS2x5r8DdO1YLqnirVoy82t3cjqZYOOQDuPHB9M96+zynDPCYVX3k7noZdgJUaTlJFrxxYnSNVk3yhpb9DJcnJYxxqTtGB0ySMmuuScqtjtrVUkrEuttp2mXzaS8C/Z7q0AncfK052Fjn+6gPf0NdtPkjD3npqaQqRjHm6nQ/sT+HfAl3+1j8PdY174a6brkkfiEXNpb3N3JawXVzGwdN7orFQpUHGGz6HOK/NfFPB1v9QMdicPG0+R2a691/W1zxcXhI4+lKF3FvrFXf3Fv9srXdB8a/FrxH4o8LeOL/AF/w94y1W61jT9Y1Wy8qS9YuYpQig4McbIYwcdu9fn3gZNx4XqYGvFQqUmlKCd7XV1f1NsHh54fAxpS6f1958zeI7ewg+228PleVC7ENOuNzAAlV+g4z6mv2Ks4yjJRT08vy7+qOWtytWbR57od1DbapqF3o05t5rX97AJWzngZ47814OHpS9rNR0aPm6FSn7apGjo0yW20rVPEk03irUXV7rzNx2gERqCBjGOnNbxwbnL2k9zoo4Kripe2r/EX9OtBpupnTIMQ3LBVSdk3pz7HhcgcfU100afIz1KMIQdupzHjyfxBomvRQ69GZbWGUbRGmNvpxXlY2tXhXi5L3T5zNljo42DqxvDyNLxU63PhuTV948mSLFtyMKe6+3ripxmJisO3Fnp4qUHgXZ9DkNBeO9nMsUCyTYyxJ+9gcjP8AWvDpzXLz9T57AQgn7S3vG/osDfacmIlmIKnnLc9BW0XzTuzs5pTkfTvwWVYfA8hXKjcAFA+7x0r854+nzUoLzPUppQomjqGHJyP0r8us2yed3KwLAMuM/Sh2iKMXcRAY+vUdqiTQ5O48OMdeD15rPmLjNKJE7kSY3fjRzXMdG7jt/Gf8iqULq45S1K93NEq/vGHtk0+R9DO6KDOPM+Tn1NappbgovqThmaMoy9RUNxvdF2gt2Yuk2LR627r0z+ddHtn7M4W7VrxR2EFleX8wt7KzeZ8fcjQk1zWlN6anROtGC942PBfgLxL441k+HvD+lSyXSj549hytbUMLVrT5YoyniacVe4urfDvxloniVvCWqeH7mO+DYEJiJJqsRQqUZ8jWptGpCUOa+hT8Q+DPEvhq+Ona5otxbSldyrJCQSKcsPVpL3kVzRnG6Zmz6VqaIZH024CAcsYjj+VT7Go43swtGxVA2/KDz9Olc7VmZtqIxuOnI681ukuXUlRcncjkjeTqMc9RS5ktinCKe4+CAhtoHTuayqNy0B6bn6y/8EO/hZ4d0D4Raj8T4ikt7fXBjMw52Afw19XleHhSwqkeBiG6tZn3ZDJNPKqXMmE6geteluZKLSsW5b2ztLfzRaAZ+6u3Ofes0lfQpeTKc1m1y7XNxlFHzFMYB9qttLclmTrWoedeRubXCiP93GE4yO5pKpzSBNKNmc54i1yK71OO4Aje4Vwqq8Ywo7kZqJxlJ6jTnay2OE+LQvILCW5tlW13YEboQXmckYUCs5xt1Fe+h5zLpDLrs2rXr7r+2tQJrgyYAJ/gA9aUIKVW7Woocqicp8QXvfFeoRaPceHzBCIRM0sT4yy8jg9K7JXggcjB8VaZrJie68QQtJN5A+xxkAgY6moSnNXsZuEYoral5Wr6E8V5C0UixqiqhG0ntz2rWC5tzGU7M8K8Yatf6JLqKgzuwZluI5FBDg+o7H3rVK7sZyTeqPl34iXLS6vO0I2qxPXt7VzVIuLuU1ZHlvjS6IslgPXdjBNeZj5NUbG+DcvanIXKPsJUV5UKslpc9eEeZkMqARYGcjsaJOTZpKpaNio6ksQPWiWiIptWK1xGOp69+KINLc5qzcrnSQSqsK4IqOW87s9Oo7Tdi9bzZT0x61MoRW5lJtO5IkmX5ajlikbQldaFmLkctmocrMTlystRmMDaxH1qGtQ55DhIkbZAHtTs+o0pNk0bu/KDHqcVL5YmkYdyYQmRfnPNRKdti3yxRa0NNes9Vhu/DUtzHfW7iW3ms2YSRMvO4Ecrj17U41ZJ3Rm5pLQ9N+ACeLrn4gJqWv6bq2u6tcBk066kvmuBbzlhiV13Zc/ewNw/TFdeFxE/brmlf1OXFP8Ac8z0P32/YB/4SFP2btKXWzcSXIgCzG4djIzDgltxJz+Nfdw9n7BKPU+LqSlKs10Pe9CWDS7YKIT5jkEnGCSannsrGVWNSfXQdqM/2u4W3sbYu7N85I7etTH36iSRpSi4U3KTLGtyx6dpDRtOIvkwcfyrpxc/Z0eVaHNhY+0xHO1exy1rZBbmDTkuzbtcHdJBndKU9T/dFcFGikktrnp1K7ndpXt9xhfEnUbaOYxWYwifLErnjPdj60q3KtCqCnypyep554Q13xXpuv3WraEII2aUl9QvlyUTB3CPjA47n1qYSlGN46WOiVKk9ZO9+hxVr4k0Tx38V7zxT9qMhiEUOx35kw26Rh07DGa4qc6dfEuTWptXg40FEg17xRZeMrHU7uzjuXjivViP2KNguN2Cdw6/LxW06sXdJGLvTSj1PMPiNq8nxF1qHR/DRuzZWEZghM8SvFLjkDPIDdRmsIv21T3XoKgnBXe7J/APh7RgI3122FvdQKwliu73Mg4/1f8AtIe3pXoxgnY6ZvV31R5Z8TvBniHQPizp/ibSfC0mnabPbMkr2cYdX543+2KyrXhUT6FRrQ9m4oz/ANofRtcs/hsl5rWoGCWXdLbOsZ/ejsGA5PHrWdeo+TQVGabdkfK+orr1gTqs+WilIJnsRuhI9GzyprjWmsjthJWt1OL8WzNNPIUCsC331bO89ifet6U1c76Cb3JtLlhj8KX8k2xV+Tdv6Lz39q2qNuGh1qfI7Hlvjr4feMLCeXV4dNlu9Of5kubaTzEQe4HSvm8bSqqTa1QSjKTucbI46dPqK8xJ3JvYh4J4X8a1knYS03AIQeP5day5bobXUelqrDp+VQm0zRakMFt5d+NqHrjNdMUuUxqqzOus3JtkXHGOM1jNWN6fw6lhOWyelZx0RcXYtWdlPqN3FY2xHmTSBEJOAM0oUqlWooR3ZUpSloj6H0T4G/s923w9ttK1Rry58QE7rzUI5f3an+6o9K+srZLgMJhleV52OlYSimm5X7k37MXhKP4BftSaR43W9WXTksbr7BentKYztXjvmvyPxAwWLxnD8sJRvec4pW9TCrBRnFrbUj/aW+Cvi340eGW/aK0+eS/8T2928Hi7TgC0k0Wf3V0vrhcKw9ga+gy/Czy3CxwsYaRS89ep2VYSxeFjVjG04qzS7dzyrwV8B/HHjbxBaeGtB0WWa5uJFUR7Dnk17mGy7EYypGFNXueXVquNN9z6n+HX7Angm+u9Q8LeNfBmtNftDHHpt3pTjfHcAfOrxtwwz7g19hDIMEuaNaDja2vnb7jtw2AVelGftEu9zovC/wCwP8J/D2rK2t6zqEnlqG3vCAEYHlHUnr9M19bgeFMmoTg1Fyur3e19NP67HqU8vgk58t16nsPg7wb4Ha6t/h1pBjWzhVp7qQoSlraRgtJM/U5wOB64FfX4vNcHkOT1K0rxkrKK/rrsbYivPBYRzWj2SXU+JP2l/Bnxp/am+IF38bvib4vbSvA+gtcx+A/B8M+0RQQlUimZAAQ7khgcZY5Pavx2nwbxNn1KWPxElH2jveTu+W/wpdN+2p4lPJsVVre2lK6l08/M4LTPhlb+IvHWmeANF86XTPCMASJbqUnzLxjukkZicbixOT9PSv23IOHaVGvThH4aSV+l2e1gcqUq0Vf4dzT+NWgTa2UEjGS0soI7YQJFuRBGSSij+8T+dfpVJUrWPdxFOKppbNGDqeq5sG0G302KGO0RJ5tOJy0H3gZZOwI7VsqsZTsnojKhOT9y+551qPjL7N43v9ZNpE8JtvKtIJx1UjGT/OpqVbT1Qq8VGFjh7f4nHxNrl7Lq2kzjT9OTyFuvOAMwB/1YyOmMdPSscNVnKUk17qPIw+IrVK0oyjZI2fhb4u1DS/iz4V8VLNJbRLrdsQFwpiiNwq7gc5UlWI49K8viOlUx3DuNox+1SnZdE7O3mehT/dVFOGln+Z3H7c37Td5+0X8RtZ0D4R/Dqx0fwT8EFOhiPT7fy2tbVrjyg8pBw2+fv15575/l7wKyiHAE+bNsU54nM3zPmd9Ur2XayPnsPi8M51Uqt3Fu619D5o8aawms28csd2UhE/LseWO3k4HUV/U+Nq0HFcrdl/kbVnelzb3PONEk1WTxRctdQR/Mo3DGCQBjA98V4OCbeIl2Z8xhKNajjpuS3PVfDdzp+jSS3sNomxrPMTt8yluAd3sccjtmvb0jFtn2UORU07GBFdXGuapqM8wWGPelvvdcgICFV/qM5zXj+1nVlJrY8fD1a2InOS72K3iVYr66ma7u/NnS5SJ2ByjFVIL+vYfnWcqUqifMd1eEHRlzu7RX0rS9M8W6BLpd5fLbPZKX+/8AIcAkAjuevWvKxlOFSCp31R5NGnDGxlRbOF0ixu7fUZ7dGT5HwY4xhW57V5dCk1JroeJOjUoSlFdGdXoUnkXsMJB3CT5AwwN3ofau32cWmkdWGhKTPpv4RSCb4eyXLhYy8/3EP3eK/N+PlCFCmvM9v2LjRu2XZg8jkFvpX5TOaWxz2UWRSr5Y5GM+tZpSbBtormYFz8wyKc4uxEndAkqnqevQ1k4szTsMmbDctz2qowBt2K17qCWVs88gwFXPNbwSbsD0jdmPovhv4i+PdPufEfhvRJbmxtT+9kjGdor0I0F7N2R5ssQ+a62JdMe6ZxbywP5gONmMnP0rzakbz0OiniFyXbPQ/AX7PXxe+Is0aeGvBl3Isn3ZHjIX9a66WXYqrG6RjUx1GLPcvhh/wSb+PPiW5F5r8kGnoxBxgk4rsoZJiKj992OCvmcIO0UfUP7OH/BM/RfhNqMureJrtdRuHXH71BhfpXuYTK6OE1epwVcXWxMtT2HwH+yH8MvA/iebxZougQJdTnMjBBXXSoU6dVzRftJuFja1T9nP4e6v4mTxTe+G7Z7uMZWUxjNFahTqTUmtSlXqez5SPxN+zb8N/Gd0mo+I/B9rNMgCqzRDOK2lGEo6oIVqkFYzNZ/Zd+EF/bvpU/gWyVGTbxCM1MIQSs1oU69W+55be/8ABMz4G6jqVzIND2eaDgoOBXn1Mvwrq8ziTUxVZzvc8T+K3/BIrVo7yW6+HOvhQeVhmFebisq9prS0OmnmdWK2PPtc/wCCVHxr0nw1Jrq+INP82MZMM7bB+dcEsoxUYXTRtTzJ1KnLYtfsp/8ABMzxP8a/iVb6N408VRWWg2d1H/a+oxIUgkXq0STNjcxAx8oOM1wYqWAwWGcp1f3vRW923W8r9PR37o9mnhcTVoupKLsui3+4/Tb9mbUPhjoOr+JPgx8JfBNroGmeEHht7e2th/rwUB805Azn17813cK5hUxdKqpWsnpZ/iPPMseBw1Gf86u9LfI9isrxGTEcuCOHY84r6hyTeh8u02mLPqEl7N5sSkhRhnbgfhUq7dwjvqUr/Wrqd5QYiyJHhELdfpQ4ybLkk0Yc3iWKO9lnu7abf5AwjIdnHvUr93LUI03KJzvi++8P3iLLfSpb22zeRbvh2Pp7CpnVTeppTtCFoo8s8c2virUbiO70VQFgUzWltM5cuBzz6VLUnqjJyjL3banF6Na+Jzp934o1fRihdpX+ztLu3SYOOvUhea1w8ZSk2zWpCEbRE8PJd3thDq+qTxuJ8eeXcDaMn5QOeTXRZ7tnJNqMrHM+PvF8enXO+2CJJdQuLe0Z9xAzg/TjNCqJaInl5jgPFOv6ulhejT+FeQbkVsgcc/TBrWNzPl1SaPE/iXq2szefcTXg88RhDIo4IP8AepubiyvdjufO/jqeWFp45Bkq/wAzZ6VlUve7Oe/MzyXxxraG/EafOsUirMy9F3ZxmvJxycqTR3YNWnzGVc3C44JPFeZSpO1meynZFVpS2WY9+lVO0bIxqK5AJF8wg0ptuAqC5nYhmZST8tc0m0h1YRi9S9b3EzIo9q67LnNpXdRstLeXIX92prKTVxTUpO5Yt7i7Iy47ccVm4tuyJVRx2LUEl8wyin6Y61XJTjuO1STuy9DDd8eYp+mKylOP2TaNktS/a24cjIOa5pTZrz6F+G0m84Wyws0jcCMLlj+FRrJ6ImVTlWp2Xhz4KeNfEXg28+INrZxppFjOIbm6kkxtkPRMdcmumGErTg5paI554mCqqHVn0n+z5+zne+DPhVZfEm/8IXCXeqW16oup9PMyiQqghRk/ucuW49K9OjhZU6Clbc4JVPa1nDmPVPhH+xP4Ma4tPip4o8Kr4c1GwuFnuG0m6b7FecElvKPIcknC9MdK1pYOlKXPKPK/IynXqv3Iu6fc/Vz9lxNOHwWsG0u2uI4io2+dCY3YdRkHoP8AGvp8PBexvY8bF0rzs3bZ6eX9a+R6Na35dfJVhnILEr+lNx1I92WqNuK4s9PsPtMqhTiuqHLSp3Z51T2lStyROe1Sa71e8jkhni3K24M54j9/c1yVG6s+ZnoxpRo0mrf8Ey7S7tbbXJ5DcB1VSbm9l4MhHbPYewrOMouQ17tK1vkcTezTeL9cuNcvrVf7KsTuLhsCZs8getZOCnU53sdMIVI0o3tzaX/U5H4ieOvDMVwV+zFbUcmKInCHjhj2HHapqyi1psbKFSR5RYeL/DepfGSw0jQH895YWWS4Ns0MOzocMwGTXFGvSjXiox9ToeHksO5zKnijXde1HxHdfD7wl4ru9MtJFkhtYdIjRo3lyT+9bB6gYB45qq3PKbUXZGHs41LVLanCeHf2bfEfwT8FXXxW8HXup/2RDfs+s6Bc3DMPOY/vJVLnK5646cdKKGGVD34/M6nKlP3Z7l268F6D48DeKvCusSiTYpt42Ynyw3pj7w6j15r0ZRjKPNEyVRxlyyRj65rHiLQrJtOvtHu50AMcwllMOxhzvj3fe9emK4qs52s2U6UG7nAeJbbV/iTocFsuvXa3UVu/lNeOAMew6H6VnJK24KNpaI+c9Su9a8D3F3Y6zZW8gQMs1xFGTE3X76AfL9a5HUs3c6ormWh5n4vkiur6S5tkhRWOf3J+R+K2pNc2h20+aOjFs0jn8FavAQrg2oOMd8967nKPsz0IU+azZ4xrlxremXTRreTwKw5SKf5SPbBr5rF+4+aL1CT1sjCmkZst1z15rkj3YuTlQRO8XXvUTTlsEbSJFl3Hd69KycZJBJNEgdun9KlRe5PtLbAny3AJ79hXRHRakPmqSN6wnkaBQTwBWNRxW51RhZGjbo7YUn8qxlOKdjRKKNLSwbW5S4HVDmnTquFRSRpFpfCekeDNZOqLHbpetGx6ljXrRxKrRu3qXGavqfS3wG+Fs+ueGYNZvLBL+PTtUilKOPklUHlS3UAjivqMo4anmOXxxUoqXLJOz8j0sJgPrmGlz6du56Vo+nP4d+K0uv8AhzwfcabaSsXjtX+eLYfvIc19BVyKMM8XJh52lG6drwWys+zd9Doo4GrTwlr3e3meia38IPA2sQQeL/hzAND1VwWuEjh+fee6n0r3J8NUKUPcn7F7t6aelzzqOWzhXarxTj6lyy+GvxA8MzRt4n169heQearzjy2/3vU1ll2EyyVZ06WKdZ/Lf5Hrwhls43oJP01En03SLkvG920txI/MzMSWP19a+npYGrHDx5tHF9OxvBy5bWsjzz4mfEf4ceBPEl1+z9brqiajr8Bmu/FkYeO0v9hDy6VHOeCyDa7pkFhxyBXwuGzCHE3GFShj6nuUpXUeazbWt2uup50aftMXF4m6drxi7feeH/EbxCbPRYSl7bP9rvpLiSJBxFbxkmOP8Tj8u1ftawsZU04vS+q8kv8AM9GhQlKb51sed/Dq90WC0n1C/s5Zr27mkuJkEfBfjbnPUDg4r2cHShDBq27PUpUpUo+6jnvinrgjw9pcqrWcUknnZwZnGSSe3HQfQV0WSTZjUlKejPNr670y9tb+e3tJlOqQRRXkso3PIMfdHTcT+gNZ0KT5ua4lNRl7qOF+JAii1K4fSLJbeRr0LYW5cM8shGwE/wB4IOSQPWsMRVkna+pnWm5tIoa54a06y0zTtMtbnz3MTPPLsLLKT/rJfQkdj712wSVBQW5VanGFGxz3iR309rLXJYo4THAzxJHFkosRDRkjsDg+pJNGHw3t67oS2lGSfzWh49ScnL2SbV9bn2z+1d4Gbwt+yJ4r1PwF8MPh/pNn8Z/h0PE2r3WnI/8Aak89pJHJGzbiVCOHlfagUBgpOScj/OfLpynx9TeKxFWc8vxMqME7ez5ZN/itEr367dfKw2V4bE4TG4lRcZxvbs9NdO90fmp4f05NV8FNq1zE7K8G1UOcqD/Fx74r+78PB1MFKpJbjy+UquXKpNboxdK0p76/gC3Iij8wr9o6lJMcE+3FLDUktTlgm5czO20+C21DwncWMKkyxEmW0H3zKOroPwBIrodR1INI9aFX2tJxiYmiywadaXVvMsTSSQEqZGz5gJGcehHpUYWlCMHcyw0JUaVplqXTbW4sLnUIBEwlt0kLq2WGMguR254I/GitVi0+XYK/v07oxNJs7Q39yEdYpo4flSIj95xwV7H6ZrwMTGPM+54+G5I1Zcu5yEGnzrfzfaZXMiynOCMqc9civLw9WEU11PH5pOrJTfU6Tw1YySX6pOxeQAEMP4x2rrbcoaHsYZKDTZ9QeBEg0rwDbRDCqzEjBr8q8QObmpR+Z11a8px5Ue5fD79kPTLTRLb4lftZ/GLT/hX4Zuoln0+zvrN7vX9XiPO6005PnCntLMUTnI3Cvx/E5hHn9nh4ucu61S9WeRicb7D3aUeaX4Hq/wCzZ4b/AGNPil421jwx8EfhR4nkisLJGXWfHeqQXFxfcnc4toYglspHYMx969DLZZhQqN10tVojvwVOtiIylWs7fgbnxY/Y7+Dniq0mfSdGj02+wQr242jP0r3OSNaOqCry2tE+O/jB8HfE3wh1drbUoWktS2IrhRwR7159bDSg7x2OSEpOVpHHxOswBY5HauWUlHRHUkkiHWtIu9ZsjpmmwPJNN8qJGuSSaVDmnWSObFytRaZ9P/sB/sh/H7+zzp2raSbbSrwZkEikFlPqK+wwuCqp+9sz5761GlBxPtD4X/8ABML4LeHtVXxLrPh9bm7kbc29MqDXbDBYSnK6Wpwyq1Zn0J4a+D3hDw1BHZaPocFskYABjiHSulcnQSi27M6ax8OxxSeVEhHocdKWiK5EXl8KBn2zjhupobvoVH3WW/8AhBooVD7CVI604pLc2EtfCsBl4XHsw60pW6EpkyeE4GkwYwvbBqtOUFvZkVz4LspCXMPzDjg9acZJoFK7tYrf8ILBaHz3URovzFpTtGPqa561alCdmylh51JI8/8AF2oWi6zcwaZOkixIcSqMjP1rj9s5y93Y6p4KcKEnGylbS+1z5n/br+Jes6Tq/gvwVFY3y6Rca7bP4imtbR3MlsSSUBUY5wFxnPzV4md5k05YSEXzct792foHDPCtF5as1xFRK8lFLe2j1foe+u0/xCksfHN94bPhvwjo/lt4f8PwqImkCR48yYA8564/Ovm3w/iM2wcpYl8mlor/ADYUMwpZROWHpz9o53Up/wCS8j0r4IeOdK+PVnr/AMTtL8F2WlXNqxsFe1Oz7SsJxub1615eSOvlOdVvaxjG0Uvddk13s+pwZnQoYPD0MNGtKrCevvbxbNfTluXujbh1jQ8uA2S1fq0HGpFTT0Z8pVw3sJOLWxdvri6063ZZkCQq3EQIJI966L6HEtxlvKk98JVKLGI8F/WnexT0WpU1u7tpIvs8MW0hQVUKDkDrWTlKUirpQtE53xJbaD/ZrahJpC+bOoRSRnJz3qJxp321ElUat0OC8Ua3ai/W4s4ljFvAYjOr/KxNJXvoiFHlOKmmvdEWaWeZpY0ciJCMrh1wTVRcqY6k3I8/1DTpNP0/UFhupUEkonhCjA68fhmk+Z7swau1c5TXdLuvERk1FrJknhgJjkBzkH+KtaS5mXy6HD2+pXXmz2MilHjlw+88Mf8A69a3adhySSPI/jDrUdu11p0EBjldjj6DqKqybuzmUXUZ84fEnX7fR9KudTncDyUYypIevWpupPXoRUlGktNzx3wVZ6zqNle65qyuYtVOZEI+4gPyEfSvFxeJ563u7Hs5bhJey56nUtXelzWe1JY2UFd0bspw49R61ytShq+p3VGmtCrJGUUgdPeuapO7IkouJS8shyVPU03P3bE0VZkV3uRCDWSs2Z4l3TOisYYxEgAHSnVcnN2OmVoyaZdiji+bcgxjpis1GT3Jck3ZEsESO33Bj1rVtQRUYpas6jQNGjurEGCwaeaS4WKNEGcZBP8ASuVynORcqsYLU0/DHgHxJ4kvLSztdHkVLy6MMcxQ4j+YKSw9BkVcMNVqz5UjmliYwpuT2Ppj4df8E/vFHirX20DWNEgWG10wRQahZhlM0zcq7epBOK9fD5JOUvePOrZlGEU4n0R+zH/wS01vwZqll4w8cwWl/qdjMzW8kkA2uhAG1l5BIxwfevTwuT08O7y1Zy4jHus7JaM9++GX/BPfwvoOmap4cv8ASFNpq+o/bZYCvyFwcgmu6OEpxul1OedepdM910H4AeEvDmn2mh3GkQtCEUQ28qfLwKuaUfdZEZSbumbt38DdI13SW0x4UtJFB+yzwIuYiOhGRzULDxqqz0NoVnSndnqXw+0m98N+ErfR7/V5LqSCLBuHG0yNjHQcDiu+K5YctzmruNSfMtDQ0+9FvJ9mZ0VRy3zc/iazT7GVNN6Gpf6vZyWYkW5BjT7zMeB9PU1nVq8y8jSjQcKjutSnqJa00RtQnSQQMSVjjX55T2ArCrJxhzNWRrFxdTkbu/yOMu9D+IPjfybi+ik07SLds/ZGUb5APU55zXOvaPllH5pr/h7/AHGiWGo3V7yIPiVrem6DpMejwWoht4IwdscRyD3J9TWs5qNKzQUXed5M82stNsvENpf67JatHp6EyZuCUa4k7Zz90duPc1zRqTnFytojarKNOo+R3u/69PQ8c/aC8W+D9F0u98Y6tYNFa6VEZ2trZyu5VGSeOQD0A71zV6kYL2rWiKpuT92+55P+z/4T+Lnxp1Ob48aZ4rufDT3tsj6HoltGFWK3ByGkQ53OfWnSofW260JNLTTY6q1TD0YKK17n0F4rtXn+EWox6/4r1S5u57Z/7ajtgZEnjxhwycEHnqK6varD0Wk2zzYc0q/5HjHwK03wfD8KbW+8C+P7tbdwyWFmzsJ0wxGcSDkZHTrxWFKrCtStGVmddec1Vs4mPpPhnxn4s8S3Evjv4i3GoacspS3eSAKYSOOcckiqpU5xlec9C3zRV7WG/FbwMfh34UXxLHqNtqFrGTIqWtwHeJ+zY4OO+DW1WLjG8dUCrRlKyPlbxv4wt/GGuT6ylpFbSyIUla2GFbPQsD0z3Hqa4OWLbuddKLjoeT+KLXyNVc29uIdxIkiQYUkdTjsfWtlT5VZHoU2oq0ixo10X8O6jEsoDGxPTvzW7/h3Z0Nya0PGPFAsZbktDFPHLn5xIMLn1FfO4xwuXCy3MjYFGSa4YzuFST5QLKON3Wqc7GUbkkQUHPHvmoc7lSTHFg3yjpReyHGGt2SQrGkoYtnA55rPn5inaJv6Ja3mobYbK1eVz/DGhJ/SofvaIFPQ7LRPhR8RdUQNZeD9QcHoRbN/hTjhcTVdoU2/kVFt6pHpHw1/ZF+Iniy8SXW9OksrYDLlxhsV9Tk3B+ZZlVUqq5Y+Z2UcLVqu+x9FeA/2IvCkD26abHc3cjAK7SIQu761+mYTgjJMLFSqrmPTo4GFFOVRn1b8EP2X38K+HLnTLTUdNa8+RYtCe62yXRP8Ad45I4q8ZxNw5w/ReHkrU1ukVic8weEceaEuT+ZLRep1mmal4D8En7JrukR6nfgc2UkWFgwT8ue+OnvXymL4l4q4oxH1Xh9KhQtrUqJ3/AO3djhqYrF468cPeMX1K3iL4p33iRDFpHh2x06NMAx2duAwA9/Wva4f4DhhVKpmONqYmct1Jvl87K+hvg8pjR96tUcmzn9Zt/EPi++jm1jVbu+ZlCp5znA9gT/KvtsFlWUZNTtRgoXZ7uGp0MLTtCCiiGz8IXOnGRmeF/KkBdFZcoPfNevCvVVGVO/ut3+7+mKpVp1HZaM8a+L/hbQ/Hvwj8X/sv+LtYa0ebxnNrmk+IcbbrSbl4VltZ4XB5TeCjKR91mr8Sznw94gqcYQz3I6q5ub34z0VrdH5u255eMwE8VivrEG+ZRSWvb1Pn608Oa74j0iyufiJBFa67a2XlatbQSBoXccGVD3Rsbh6A1/Q2SPE4jAwli4ctW2qvdH0OCqOnhoqovf6mfq1/p1vpcNzHBFEgLJFDE43zEdWPcA+tfQxXu2RdSt+8cTxn48eIxaxST2dhFJ54dbW0TOJHx29QPWsJ88YW3ZyYm3LdbmObS60HwTBpl0pN8LHzwY15jldckZ9MCtIQcad5ble9CjZ7nn7W+pX+pf8ACW3kCQz29jusklbCxQ87nz3c4OB1rjdObqc7Oe0k+dso2GoWhu5td1pflSJNsDHDfZiDtXgdGPftXo2koczNKVRufNUenY5rVLbV55Lm7FyEtzpRjX5AVTduIVv6VnTxFSFVTWnQc6LjJTWx3nxm/aM0TRfAHgTwzqHjuLX/ABH4m8J6ZodjpMbOW0ezE0sM28DG3fywwT1z2r+U8z4Soy4yzCUqPsaUKrq8z055tRd0fn2NzbF4XiP6olaFTkt89Hoj52a4hsdNbQdMu2aCO8MLHeQdiEgH9BX9H5fWp1MBTS7I+gpSkqahBe6m19xVGjBLq7ltZAIJSFulV/m9VYfQ1qqSUmo7Mn6o+ZtbPc0fBvhLVNC1F7e1uTM8q70zKQJD6oeMHFcyoSozdupeDorBtqLvc6C807TmZtNubdRNGytMk0eOD0bOPv8AUY712ulGVPU7pyU42sYt/JpyNNoFvbGDaSqlnBaI4yzLjqPXNeVXppRaicV1L3F0OVjuNN0m/wDtWoytDbxqcPu53fSvFnGMJXnokeFO2HrtydonO6fGWvprmJ9rNKW2Oedue4PevNo06U4OpB3TZ4sKVqjlfdnWeFGSbUIzCSQXwHz931roVVQStqe/gqUn8R9QfD/XfEHg+003UfD91FbXEFpujuDbpI8LsTh03AhWA6NjI6gg81+M+ItaWKzGFFbJanbiFaaSJNd1TVvEGqT67rurXV/f3D7rm+vbhpZpW9WdySx+pr4OjCGHjaCsjhlTpxfMehfsifG+6+BPxaj1xW3W2pQm0ugx6Ang/nXXR9+qpSMfrE6Eny7M+7/BUEHj5DqET5Wf5lIPrXsRcWrIV5PVFb4h/staR8TdCuNH1G1WQtGQhYcg1TceRxCcrI+O9Q/4J3/GM/Es+DvD+nlrSST5Llxwi5rxPqFarWtHY5quMVGGu59u/sn/APBLbwL8MYIPEHjO3Go6mwBLTICEPsK+qwOWUcLFNq7PAr4utXlrsfW3h74c6LoEEdtY6ekaIBwigD6V6bq8uiOdQ5tzo7bQ1yFWMKM1n8UrlN8hcPh4QurJEDkd61toNSVy/Z6DbGPfFGQw68Vk4sHNSWg6bTMOSUz2HHNXzRSEkr67ktvaOf8AR5SQnsKnm6lSY86DlvMSXhfuseKHLQUW0VdYm0awRZLrX7GJycMHuFBP4ZpwvI2jCpPocl8SfijZeEYPsPhgQXt28YJlUbkX8qyrXpp23OqFFxabPHPEfjLxf4mulm1vUrm4DZzDkrGv4CvNmmveep6VKnCO25DpKTRTjbEDHICCDmoi5xltozblclcl134aaJ8V/HPhiy8Tndp/hdpNTu4PKG2Q4KoGPruPHtmtMRRjXqQ/u6s78Nj5YPKK1Pmd5tKK6eb8jY+NGtvceFbp7WMR24hYxqBhVUKcCtWqNSPPB6NXPNpqcY2e5b/4J0z2kn7N76j4sj+yNqE94+mSIv8ArlM5C7gB1Yd6/P8APMso0pV8VVbTlG0Wtfe0smuh6mIrYiv9XjSV3H4l5a6nq3i3wvfWUhntF+x3Cou6LHEpxnIPb6V3ZRncsLJYbGK2isc1SlHHU7wd99f0Zyyx6gLtYdQuZpWCkuHUhfxNfaU5pxvF3R4c6Psb33HXmtXV7cbbeJLe2hjIkmTv/sgVtCV9zJqMjK1zxINFijSSSUyiMskJjJYj1PpSnPlHycu5z+peJzqFxDYx3G9Jk86ZZDgYpJWehN3Z2OY8ZJbR2bxXTxwOF3wRx8KVyCSw9fSrjNO5fJy6s5/xf4rsl0m4lm8pFNuNxbjaAMDt61NS81oZ3d/I4jV9ehstBY/ZAxWwC4BzgkZFOMdLMzSblaxxehR+I7nQJbmWfZO0LfKjfwg5H/6q1p+4XOCi7I808WX9/b3N5NKgWSSMSFF6g+vtVSkrmfs7K7PFfjT4nhnZ76bi4UAnI68daxdRtkVJqMbJanzB49lvPi341h8C6TG2GfzdUljUkRQg8lvTPSuXFYxYei316GWBwk8bjFT+81fiHYn4fQReHrTyy4iUqpT5WiYda8enOTjzyPq8wp1cKlSVrr8jzxpmAzI7FRnaGYkD6VEp1amj2OGKjF3W5BPKfLDA9uKfsl1JdT3iitx83zevpWkqK5TSDSGXb7ozXLazZjX+BnRwOIkCk1ry3dzZy9pK5ailZ2GRgd6iUlFFxilqzZ8MaRqPiXXLXw7olqZ7u7lEcMajJYk1zxjOtPlW5NWvGlG7PqL9m34B+O/DHxWs/DF/4Rna+t7iG42XNtlEIPIPHIINe1g8vqUcQuZXPLxGIhWpXTP0H+En7BPgo+LG8fzeG1tZ7gZawXPkqTgtgdByM19EsNRU+e2p5Uq9WcOR7H034E+CHhrw7dC307SoiQo3CTnBre66IyUbLU9AsPCFnaFoJLeMLjLj1qG+5SSvc2bHw6jZMdqojVeC69Pxp26hJssnQjMyxTRBzDkowXp9KHS5tSFdal620ksA7KE+bBJHWm4KJbvJ3ZYVJmDxxMuehfrTcrqxGmzMyHwzrepav9pOrbIIgdtiItokc93bqR7DFYS5mtDoTpU6e2p0Fp4b06C7S91y93+V/q7NOIgevI7/AI1MaajLmqP5HO8VVceWlHfr1IvFPjm0jQkxL8vywiMbiPcCscRX53tp0LpYZU1ruzibzxNqfibV4rbVdVlsNPhYFl25ZvqO1TTftJLmdkdbpQhSvFalPxnrvhaLUFUX5ljDgfZjGd0nua3nKkndO6MqdKpJe9oef/EvXNRii8q0to0CnzLezcDYo6/MO/0rirTlNcqR0KME9D5a/aLufEnjTVNN+GtpqV3HqvijV48wWFmqgQRkMxdhjYNo4wD1x715uIjaKpXak3pb+tDswsI87qvS2t/M+h/gb4V0nR9W0xLfRDbtboLIObgKQyjH3Txtr16K5GklsclaKmmbmp+IfDHhLxZrdr401uBZ/skxtUvtqwp8pzl1HTpzzWXNSUmp7GM4ycE6avY+T/gd468NfEnSPE9v4ciS+02y8TXUFleQja8XJIKleqhicVzYWEa1OVmdc7qa0szk7qy+NNlqN1oF74vtb+zNyZbVyhimRuylhwW7e9dKpzpxcW7o6lyTs7HOyaY9vqV2dT1C9t7uQfvrSYkoR3O3PI915HpXP7RJtdTWUEo2SPEfix4XtfDer3GoaQpQSoXCKpdH9QTnJHv1HcVzSlJSKoysrSPH9U1SGeRzEHBMmTukyAfStozkjrUW3qXvCMkV7pt1ZmMjfaupGehrupXqU7WOlbWRz938M/Dtzqi/8LM8bQeFrTYDFLFA15JKp7iNW6/lXm5hlslHnlJIPeascL4s0Lw1putTWng7xRLq9gp/c3lxYG2d/wDgBZsfnXzrp8srJ3G1yrUyxYTHnbxmtPZX6kO8dRxtZh+7Xv3pKk4gm5Mki0u727unNZVLs01SPXf2d/2WLn4opJ4w8baodN0C0bBZf9Zct/dT/GvXyvKHil7SppEqlh5V3d6I+l/A1r8MfhfZtpfw/wDClpb5+9dXMQklYY6biK+qpUcBhF+7gvVnoKhRhb3Tq/Dfi7xFq9xFaafL8pUgLEo49BxXr5dUxNatFUkuW2/5HpYWipOyjZH0H8APgX8V/ijLCdD8PPLEoAnu7rEUC/7zHj8q9yvntLLKagnz1ErW0XzZnmeZZflcWqs9ey1f3HpXiH4Z+LvArDQL+9sD5ecvplyrhcepzXh06ud8TOUZVfZQT+y9fwPPwuZUMe+aEXbzVjMstLU3n9oXiiTyVyJTcEuP8K9zDcKZdQoqNWPtH1ctT0qf1l81PlSh08++li3pvhHUvE0k2raWkphg+/I8BYMT6nvXuUo4TA0+RK1uiR0Sq4fCqMajSb6Gnqvwh16ztleG9sbd2gM08klwFG32B71vTzSgpqlyu712Maec4Pmsk3rbYy01a7awGj6PcgRxsWaYQjdu74OK9iOGpzaqTV+up6kI3n7Sf3GNqWgC2g/tOS6SVwjO6ySFcnH8eeMV6N6ThtpY1p1VKra1jxL4zajLrXxHSSylt7eG98PRM8lu29NysRyfxxXJl9Gsqk7aKWwqnLz2jc8b+KFppyBbhZJoDCDmWN8iQj+HHHBr6+m4cqezOqlGy5tzzPx3eWXiPTpNT8N2TRNYRCK6SaUBy+TuyP4R6CpjVcnuYSqOTet+1jxjw/4o1vxjr1/rPiXThbTWUos9LgkzsWPIDSDH8XXmlCU6lZt6JGWFU603OrpbYi8feJdRW8a2swJXgi22ioP+PhkBX8EA7+1b1pNU36GuIasmcheTSpoVi9xe+ZZqrm6df+WmSAVUdcZwM98VnRs0mzCMJpczd0c/42nvXtl0+GeOCWFI1u1SM7VTf8sY+oPT1p4mtNLlRhXklC61NK+s0aKayN2wlnhjS3kGDyQckjGAw7LzRCneDu2bc05wUdj6N/Zc8Q/sC/s6/scah+2D8cPhZpPiL4h6N4ivvDNgdXtvPZY5bYyWrJGflR1LORJ1GDX8i+NWC454i8SqeRYCo6WFnCE3NaP3ZK+v3aHzeIwuChmbxWKlyqCVpLe99Efn/wCD4oLjT7vU7qRQrMZAgA5Dtyueelf0vk0IU8JGMpXcUl9ysb4CpB4b3dVds2rT7NPrR0e3tmhKwf6U2BluPvDIxx1r2lKKnodcZKUlFC6kZGaMWclxDLY2TOJlPB5+V+Oma5sTXitOprVoqKvExdU8f6r4phudPu7hROoXfdlQN5XuT1PpXEsZKtTcEzghjI1oTpx0a6nGPFNDdvfNdy7A5MoVuT/tAntXiV5Sg27s+eq06lGo5KT8z0P9jr9m7x9+1n+1h4L+CPw7k0651K/1QXTvrk6ra+TD+9kMgz84CqflHLdK/PePM5ocO8O1sbiHLlSe2+uiOWUKbxdKU/ejFpyT6q+2hmfHvTLbQf2jvHmjQ6vFqq23i29Q30Gn/ZVlImbLLD/yzXOQF9BVcEY2eN4XwtRQ5eaEXa/NbRdepVWCo5hVjbS9189STwnFbnUIWTCgyjfGOtfXKg3oejRq1Hsj6atrT/iQ2N0tuVDW4VWxgMBwcH2NfhvGVVSz2a7JI7J883dlaUfLkgg18qmpM5Kidys7SRSB42wykEEdjVpPdMy5Ln3N/wAE9fjbbeJ9GXw7q14q3dmQrLI3LD1r0cNUVuVGkpQVPzPsfw9YXeqXiy6dGCGPJA4r0adGc5XR51XEwpx1PUfBvw0t4ZV1S5tozNgfMV5r1qFBQdzw8VX9o7nZ2WhShiEiAVR2reV2Y3Rq6foc96CkEeNvJIHWoUGy7pK5P9kFqvlyx4ZehPenaxlJqQAySTIqjtyQKpSGl1POP21/jZa/s4/s8ah8Rb/w5qmoW8l7b2NxJpN4IJLJZ3Eazl8HYqsVBOO4rHFVJwoN01dm2E9+ukmk+h5P4c/b/wDiNZxW/h1PgjNdrBAsf29pjdzEgYBcb4wxPBzmuenOr7O9jtngVWfMpak3iX9qX9oDVoftllca1oMLPhlh8A2o49nkv3P47aVSdZK92vkZQwVpWkvx/wCAYB+JXxr8Sws99rvjPWcEBoorq0tck/7KwPj86xhUqT0V2ztoYSlGeyNPRY9ajuI38QeAvGdq+NxnupLm5Vf+/WmP+hralLEKVuU65x5VZWf9eo3x/wCM/EOhXCS6J4uhgiZSNt/4Z1cuPqwsEArSusSmvZzUX56fiTF0qlL+FO/lb/M4rTPjreXk8iTeJfDMhtwC4vY760BJ9DPbKDx2HPtXj1MRi4vWUGr23Oig8Pd80Kmn9256N4Rk+IXieC2m0X4RazrEcmGWfRVV4SOuVMhjyPcZBrqUsylFKULrpZr8NTmr5jlUE0qjT84tfodHrPip/CNubHWfg94/hu7iXddTDwpJMpVRhUJiY5A5+tdUcaqdNwlTd+v9XOWGKpVLNTTXTf8AyPK/jj8bfCV74avLK50fxlpqPaOolv8AwBqcMSZHVpGh2IBj7xOBnrWFb2OIlGTTTXTY7aNVOldPc9v/AGW49K1H9mXwXqmixItjPosM1ui9DGy5B/I5rnjCFai1NXTvob1qlSliG1o0dwPFF94UvbzU9Ss21TT7u1KtAzcxEDG4E9Pwr5XNMhlCpLEYZc11rFvy3XoawxGHxkYUW/ZTi7qSWj8mV9et9A1Cwh1fwdfm7gktlkkhZiXtyTjBHfniscrzb6hJUpNuFtU94s6p4WpjIyhiIqM7vll0kcNr9/eW6siIWkibcARtUHnrX3dHFU8RBSp6o+fq4SeGqclRHKatr15dYiktZ55LpT51xEcFAB90Z6VurSWpLgp7mElzDok9w9wXa4W14SQlljXsMjvSaUfeM5QS1PPr7XfEviXUZsRiaMTYWTBBwP7wPRR+tZ05O7uN6LU534h32q3sjaHp1rLdsrp9qaJcqq7h1NTUrpPlQqVF1G30F8XXkKWX2XywG8pGVFPLHgYIraMmS6aizA1fWV0TbZXMIjW5tyRsGAp961ulqY8jk7nkPxB1a3jmeR7lTIYSJPw6Coi1Udr2FPZW1PDbP4cfED9pj4w23wd+FdgLrVNQI8xmbEdtEPvTSH+FFBzn8K87NszwmT4V168rLou7OnBZVWzCpyrRdX2Mv4qafafsRS+If2eYvDIl1XVkI1zxRPErNeOpHyxHJKRLjp3r5TLMauJJrGQbUVpY++q5dh+E8P7OpDm9rG6nbf0PnLxt4wufEepG+u5dxWNUj3HOFAwBX0NSHRbHxuJxLrzczmnu0YfvZePTNOMNEjKGzbKl74k0mxjPnXIOe2a3p4WrWlaKOLEYmnSepFpmoW+q5ltAdvY0sTSlh/dZvg5qqudkk+FUoTXEo8zHiKis0jobL99iRx1FTUqcqsjrjaOh0vgrwR4r+IOuxeGfBmhz6hfSqTHb265bAGSfpWFKnOtPlgtSK1anSjeTPpb9jT/gn547+Nur2Xifwp4ou9OvtL1QJqkMlo0b25U8gEjk+4r38Bljupt2a3PFxWMVnFq5+x/wm/Z503TrWzu9ZsFutQt7dIpLt1HmPgYyTX0Emlojzop2PZvDnhKzguRbXCBY0Tg55H1qVqJux0+n6JYTwu9kgOzGCFGavlQ7suR6U93bsL6MhgcLIMYIqHqxuyehoWujz2FkUt5VaNhzg9aFexlKpCUrMjismLovmhj3Ut0raCbRpKSjG7G6/NqEVsun6Na7ru4ilFvKyZijdVyC5HQE4pVISlojD2mvoZq61bGIwRMrSo5T5ByGGQxrJ8nLZPU6I05pXkbGm3EdtALgFWG3OH6k5ojKEVdhVUnpF6GTql1qWr3E7aFeW6SKp2NMp2KfWuaq5VPgKpqEUlJXKWk3Vto5RtZuo570tmVVjyCB6VMXGK13NcRTcknHRGdf315rGoyRaPpkQeT5mYwDCD1yeppqcpPQUH7vvHN2miX+tXk2orEvlRSeUt20eGuH5yF9hjrWbkpu+lnojedox5VueWfFm81iPUX0tb4AQsWdtoJbHYtg/lXPUlKm7M0p0owXNI4T4Q/C3X/if4yvPjpqHiddGjs5PsehIqA7mBOXZWBzluOMcCuejD6zU9vKVrbGuJreyh7CMbo6jwD47s7jWdds/F+oWR13wxOI5rixB8mRX5ztP3WOfwrrhWhUlJPeJx1ZLlioJtM+b/jDrT/tPfEOfQ9PiurXTNAMlpdA3BA1GRsck8ZH4815/tVjJOK0SOuivY0td2b3w4+FUnwO1zUdMtYJrHTdW06GTZDDhYLleN3H4V1Yek6E2u50SUZUlJ7m5rNjFe3V/FrNnuExXdKj8iX+Fxn15z9a6ql5XRkqjWiPLNXufDV1NdeG/HsVzCYyUt78sBJbkdCD6Z7HmuCajB+8jW073Pmn9pS1134Y6y+n3ztdwXMfm2V6sm+G6TnDxnOA3rjHvXPUhVir9DqpU6Klzq9356aeXT9TxefVTewbykSF2374024z2IrWlTd9WdkItl/wTdG3v2VNpBBVgfcYNejSkoqx0xklJWOI8Z6ZcaRr1zCHjbLkmOQguo9vavCzCjU9o5N3QVUo1NepjySBlwT1ryYfFoUo8yuyNCc5AA/Ct21bUjlUmTRqgJdhWE56WKaS0R1nwd+GXjT44fELT/hn8O9GN7qV/LgDOI4Yxy0srHhEUcljwAK0wuHniqqhA5qtaMEe3eFZfFGh6vdfClNSg1G00e7a2ivdNBaGZlOCyccjOcHvX3WApOdNYfp3PYw/NGCitT1nwd8DfFutaottqxmsoCAzG4gKuy+uD2r148OYmeM+O8F5WZ6uEwvtVzzWh9aeGfg38B/hp4D02z8Hpcaj4gaEvqF1cxgRoSPuqO5r28PQnhJOjCKjFI4qeIzKtiZxnFQprbuzWg1/xVBpX9kvqlxFp/3msUmKKMey9aWE4ay+vUlWrx5+Z316Gscvwspqq4Jy77lfw54YttSvrjUdMjy0hLzYlbaeOQBmvbhlmDy13oRST7HrVaqjBRkvwN+28EaFJDcXtvqMsTtGF+zruYs3t2rWvRrVIqML69exz0sXiVUUFFNdwurr4h3WlvoGjSXiaanzRyw2/JI+9zx0rrhDCYeXNVabZcaGAo1fa1UnNmV9nW9tjJPrt1Mkg/drdkqffA9K9KhdK6Ss9jpVZvSMEvQdGWjtgobYF+8okxkZ4rWn7XEQtNW32fn380bypycSh4tj1jxFpkth4b1CC1uZnj8ozJvEiqwLoe3K5H41GaU6iwE1F2bsTTgrS5k36Hj3xk0Dw7pniGe801Bauq7UjTgFP7o9s+tetlrfsYLrY3UXGCvqz5y+K/iS+vpprC3eFVuH/ci5OCADgnOM+vSvd5JShZC9pOEbdDgTZzaTZGHSYQkNxe5upWl3POwGcBeuPcgD8qIqlT0S1DlpwXM92cXr8MaRAxxCR5oZpGCrtRXJPQ9wMZz61LlquXqY1W3rFnlcfijV7+5liutEvRK0jW08xt28uKAd1boNw4rmxEql0pHFByqytJPQpr4s0GWxa8lNsRHPmVTPuWCOP7sf4nnjvU05RUbpo1q1oRpuzMy+vlvbGa4e5W3e7P22UZyUQH5ck9/QVr7aFrtnJTjzq7NJdVEdpHfXkaFLmAiy3HBiIUgyvz1pxrt13bWLX3ef+X6nqudKNNR6nrv/AAS98N+FPjj8U/HXwD+JnwitviBpep+EJNd0rw1cS+Uh1CxIeJkbPysys6nthsEV/OH0msxzLIeHcDmmX4l4eaqxpymle0J6fcfOYtYacpRrP3ZNb7bny9+0xpXw10X9qjx14b+AF1eReD7bxA50eDUbKS3mtUbG6Bo2yQEfcgPOQoI619j4aYjN8dwpha2YTUqrSvJdfM8zC1OXEToU3ov1ONKa4gOoWd7JsT9zcS+Wd6gkg5GOR71+iqpU5XJS8mdjqVY1FaRRmg8T2+q/Z7nVPLAQokwYgOuMhT6Yry6k61SpqzKX1z2zjKWhX0/Q7uOC4nBZEdN027kvk4O39DSow9nFoxo4aVFvle5Slt9SR5rC9H3TkTKBn8fbpxSqOVnFmEoVVJxqbHS/s9w63pP7RPgq88PalBZXyeJ7VLO6lumhTe0gVd8icqpJwSOxr8+4+oUKvDeKWIjzwUW2rXv6HDDF0ctx9PESV4xabS6rsL8X9I8eWfxy8ZJ8SbCax8Qf8JPeDVLO8Vlkjk81ichiWxzkEk8YPNHBbwFPh/DxwUk6fKuW3axtKr/bGZVsXFcsZybSfRE3gmGKTW7a3V2VmnAZxxu596+yrTfs207M9Om6dBrmPffgl4hTUPh7NYyXBlEl7PLiRi32ecSYwuRwGTIIHGVWv594hi6+KrVZb3/A4qWKnVqOK2uat5Kc7V6nrXz1OOly3eT1IAoAyR1olO+gpNRR9I/sIfss/Gfx/wCNrTx14aeWw0+OQeY5U4mX0r18vwdWXv8AQ8TG4lc9on65fCj4bDw9oMUGpqHlWMB2Pc19JRhKCseZVbq6S2O+06yOV8mPIU4xiulPUyatojVih8pyrx43DkGm2RJIsaRcy2TukK4LZAJpK/QlydrDLuC4kctKBg85xScWVFPqSQRGTGyEB8YBNJRRocd8ePhr4u+KXwj8T/Djw7qmnrL4g0S5sfK1K282Ji6ELlfUHBB7HmhUud8re5jT9nOpGTT0fpsfB/wh8Y674m1VrnWYvJumZY7q2Jx5U0Y8mVD7iRGFcbnaVu2h9TClCCue1afICIt1sjADqyZP0zWjcnuYTd3Y6vwhauZWWSABG5HsOOh604Ra1sROLXU7aCWWOIDa/wAi4GWJyDWvPKKvYwvucr8R/PKENNJnbheT6VzYuS5bHVhJS5jz2C9ubdipuH5O0KWNfOVnHm5W9X+J9HCUuXRnT/Dg+F4v7Q1jVdOs7y7EOLSK5thgjIDPkEHIz71WFwuHUZSmteh52YRrVnFJ6LcwfETaRqWqtEmjWyvHGSWUY3HHvVKlSctFqXBcsEkjxP8Aaf1ZtH8H3T28awTnS7hEjViQcxkAYzzkkV38jp0HK1mkTCnLE4uml3sfbX7OvgweBv2a/CHgmSPa2leHbOBlH+zCo/nTw1NQw0U9zDNqsFjppbXt+hsvdRXFq0LBcKCrBq6FqjzJJKVmcXfeGtc8J6u/izwLf+TMxRp4CMpMFbcFIry8ZkeFxq5oq0u/c9ehmtRUfq9dc1Pp3XTQ5DxN+0ELA6pF8T/AU8kl1qKyWc2mjaIojjcCO4B5rwZYHNcrlJ0veV7/AC7Hs4eOX472dGnU5YqNmpa3fe5o+J/CHiC68P2nijw0y3ujXdr9ohl04qzhB1VlHK+9elhOIqNTljWXK3/w2vY8avgKMK0qVGXvRdrNW+57M898W3ss12LPRS0UixEv5wAIOMfN7+1e7GrGUU4u557oVqLvWi0YGjxvbaXcW9td+e4nJ1CVx1P90VLqc+iCcfaapaHJeGPEF/Dca9fRRi3t5rjYjk5LAdetcUKkouUpbI6YxhFKC3OV8dvJHfx60lzjDEGNerjtxXa69krnO4Ru7nGePvFzXdsLmRwjbNzgnuOla3nbm6GSpup7sUYXhb9mL4r/AB8Nx4gupV8M+E7aN5r7xDqY8tpEXlhBG3MjEcA429Oa+WznizBZVCUab5qi6H0WUcM1sVXjCtFq7XTv37I8W/aO/bV+COjeAL/9nT9lL4ZapoPky+Ve+PU1HytR1EjhhIygNtzkbQQB2ryMtyTM87xFPMc1a5Vqobry02Pq8zzbL+EqVbA4SbnVkkm4pKMX5N6tnyRr82oXdu1zfandXcvQzXly0rcnJOWJPWvuo08NhqLVKKivJH5ficwx2YVF7eo5W2u72ObvEmdtqygYrhdVSlqa06M27soS6HNeEobkgN2FbxxMaaukFWlK1kyjdeB9Od900hYg87jV08xrRvYw+o06rvPoaumWdlpVl5UAAAHYVyVJVa8+aTNFGFFWRUZ/tExI+7nrRW/dJpMzjBSep3ng/R4dS1y0t7u1uZbRp1W4FmuZNmedo9a4KUXVaTOqo3COm5+gH/BPz/gmd4k1D4qxfFKfxRfxaHG+7TUUGGV4zziTHWvq8uyuNCXtG9DwMZinV9xLU/Vn4S/BDw74Jt0stB0WK3cyZcoADI3qSOtevKy0icSVviPUdG0y2tbrf5YiKHJUj71QlqVJ3RvtYLdyGa0iX5sBmK8GiW5DdjRtIbG0iVHXaDw3l9z70nJLQXvdC5E0YBt0QgN90keveqSctBO7d2SppyaSFvmnZxjBXOQKJQ9m7maqOvLksWpbaynh3R4VmHJUc10RcHG6JSqRlZnAftEan4o8K/B+/vPDVvcyymSNJZbeJ3lhiZgHkVE+ZioOQB+PGa8PP6mJWXSVFtN21W6V9WetlMcLVzBe1tono9m7aI5P4V/E/U/iDdajbz3tvq9rpV/b2VvdJaqsjMIFeUkqefmYjBGeK83IsTjsSp+2qc8U0k7Wf4G+Y0aGH5PZxcZSTbV3bfTQ7XULvUL+9FlYwyocFQm3AOfevcqXlLliefC7jds0rHRZ9JsgJ3jWVuS8hB2/h0reNH2Ss2Q6ylPRGVq/hux8VeaBHcNJjBu3fylHsMc4rmrU4yeh3wqOnFc1vTdnJa58MNH8JaLLeRfEHVrcshL7bsshJPQZ/LiuKtRUI35rFqvPn0hoJpPj2e3063tL4LJaW6YW3aIxtIMdc1dKt+75U9jlqxvJ23OB1uCw+JvjmDwnaabBaee7NNDAdwWMnAyeOTWaUa1dU/vN4xnGm53uN8R/D3V/hNplxpngjU4YYIZMfIwLxvz9xc/MenaqqUlQbjB6C9pGu/eWp87+Knn8K+Or3T7LVpLpdeRn1y6uXSO9kyucKg5wMEZwccZxmvOjKdOq4xe+53UlTnFK2q27C/B34UW2g6LeR6zbXQt7x5b6yvZgZGj25Pzeh+tdeDoKLbZriEnNO52tzPDq2rXOq2+pfbbKTQ1eTHO1hjJI7HHP4V2TUnPmicrnNxtY8+uNY8NeKItVsW1Yf2nZjyriESY3oAWV1HsDXP7ZO/cpRdOKkz5q+IXinxB4n1G/sbSybEMzQT6hKDslAA6ZHpjB6157qzqTsdVJcmrPB/H/AIKk1dB5/iK7WKPLQebcNJCjA/MChPAJPUUnzt2ud1OEZz5mcNc6RLZwvG9m6qRzhdwB/vBh1FdNJt7nVLTYm8MzTGfyJNquvAYcZrqp8vMrkxbUjL+MFxE/iQyYQymJd5xk9Pfoa8/NJSpy0NqkW0mcVJgHqeeleHBO9wU3JWHxEA5eqlrohOdtjtPgH8H7n4+/F7RvhXB4rsdDi1GR2u9X1GTEVrBGheRsfxNtU4Uck4FTCnHVyeiJ5KtTSK1Pse0v/wBmX9l3wtqHw88E6Jq0lhqNsYNa1+K68nUb9O58zBCITn5F4wec15dDO8ZRxPNh4pRXfqe1HLsHhqS+sXbZ1f7K/gz4TXljN4s8D6NqD6aWJs21dAXjOfUY3kevvX6RwnTzfiDEJTg4U073Wn4m1KnCdRRpX5T3MwaVrly/9qxzTzOFDzq5BCjoo9BX7Osqapcqk159T1qcJ4dJU7JI6K20f+wIYdWlitViui0drE91ulUqOsg6gV8tiJ1KuZ+xptvu7djm5aOKqyg20476aa9ix4s8LappnhNPFF1rVrKbl/8Aj3hlBIX3HavosFVquu6HLpbcvC4qnVxf1eMHp1Nf4JR6R5iSeJZPs1i8DmYock8dOh4riz6rUw2X2p251bToZ5wqsIv2OskyO/8AEVlpuogWnia42IzG3iWPaFTOBnI+b/PFfOYbPOJc1awlFRptLWTV9PIxi604csYrme5V1TxXqM8EMF1q9zCq5aIqdu/PqOlfSZbw1PC8tbEYiVSSbfZam1GgqdXmqJNmbqJ+z3J+1RqTBHiMxNuMmf4j7/yr6ujCHsUoux6MFzK6e4aOY5dW+3aoBJbogKxA/f8Ar7UVYYp0UqDV+5tXVSVLkpvUj8aanePo0reCfCNzqF4ZFaHT7O6Ebld3zbWIPQc89cGvMz6VWhlUpOeun5mEabw9FynPXzPEPiY9vPNe6tJPMzx5Pzjkeo4r6PAVL0Iy8kaRU5ySPm34i2Fnq2pu1nGsc0gkFv8AONyN+PQetey6jkrRN37z5Tx/xNb/ABE0C/NpfPHfQzRASX0IwwDE9+gAHGatRafM9UcuIpzpLmucudWu9RF7YQz+YLVWRTG5KBQcBQTjOetVeLs0ZUpupqGpX13baVLoMmtTJFPZ+ZOlufvnHC+2P61lOpKSsayqKGq1PPV+G3gu8vri2OmQOJNjFFPBJ+/IxPXH8645QovRxOX2FKoruKOV8Q/Dq2tnt00m8u4mlR1EL3W4FFOQ7ZPT2rmeA52uRtHPiMNFpezbRQ8Sp8QbKVpE1MXkUFv5hLpjMRGP0rolQxOHfNF3Vr6nLUoY+mvaKd0j6B/4IrQ+PoP+ChHhSw02C5t3v9L1S0muIUDM0LWrtuOSMAYHvxwD0r8F+kTgMXmfhBjn7LmnTlTkrK+007/cfPY2piPY89WOkWUv29PhXD8S/D95+2b4av4Jr7StVTSvGsloDJBqDNLLFBeLMW+aQ+Vh1woBIA+7k/M+GXGFTB4vC8P4pNOdNTg3o9Em01/wT3s5yilgaFDM6LtzKPMvlufL6xX98GOnXQEbDcisuTKQMkN61/QdSpUqtypPT8zhtUq+9F6GNfS39/etMbpMK5/cIMjOMZI7DiuOnCvUq8zZinVqTblLQvXsN2IngtXJmtLT5wRkKCc9e4Pb0zXdWThTutzasqsoNweqMiWSVo3llfdceUA5XnjHBPr6GuJzU43k9Tz1Kbj771KN48ryfa7biWLa48tiuCOSOOnrXm4ynTrU5Kyd1Z+aODEUlVk3DW259HT+G7//AIKH6fBrumaxb2PxQ0XQEh0qK9ljjTxnZ2y7WSaZiAt/CoCgtxMmz7pALfz/ABx8/DbGunJN4OpNt9fZOT6L+R/+Su/QirVjGrzUnbT3U+vdP0PF/C2m6s2rCz1K3nsr2xmaOS0kQpJC65DKykcEEdDX7TSx1PHYONalPmi1dNPR6eR14OtVx1NTasfTWmeA7PTvgw/iXwvIf+EjsdRtLq809VAW+00Aq7IcDMsbkMV6srN/dr80z7D4Z1qkXK0t7GM6VWGKjKG3UfIElUTqMBhnB7V8JJq9kerOzV0ekfsx/s9+K/jt8QLLSdL0aaSxEwN1cBDsAB6Zr0suwFSvVUmvdPHxmKVNcqep+zX7Pnwg0v4WeDLLw3oumpH5EShio64FfZKNOnHlijwZOUpXZ65p1rbm1MZyZD1UdBQrWJ5m3Zo09EIt5lheLLYOPrSi3cUotkt3HdS3W2RCRnjFXZt6kaCzWz2+JGP0ANU/dRctEP3yXMf73gY61PM5CjJ31It9xgJE3A9BU3sbbajIlmgnS7dj8jBuTmtF7upL1i0j8+v2iLKP4F/tp+J/DpQQadrlxF4j0kkbUMN1kTqP924SQ/8AAxXPXhCFTTZnsYCnUrYWKk9tD1PQtS0mXTDqa3yNA2CX3DC57fgaj2iijV0nSk02dx4Wu7a4tY5rMKwLAqynjBFKneWpyVZtvQ64anbhAtwyxqihd5BwvPU4Hat1poZOpPkulscb8TdVtp5ZrexvIpljkaITQsSkpGRuU9wa5MRFvSR2YNtpSta5wO4LFudAW659zXh1uSOslsfQUW9Dfj1fxhrlr/wqDwV8Mbm51HQLU6lqetvF9mijt5l3FFmORK4C524q8Oq2NpunBW5Xv6njYvF0cJXnWlJ+9olvt+RwXwn8Sal8XHvNTj8A654du01qXT9N03xDJGk16ikf6Su04CNg4JxxzWjwjhjrJ3sreWtjSliva4fnkrI80+LOkt47+Mvhb4ZXbB5b7XLeCSNfmDBZg8gz6bEfmuzGyisDJS3ei9b/APDnZh6lq6qLaOp+hehXdrCBpsWBH5SooPQADAFZQVkkeTOTnNyfcwNZjj0LVJLy5Qy2znkZ+6fU1q/dVzKpPmWhTu9YkuofNtrRTCPuNE3Nax5ZxuRF3Vmc94v8M6DrSG41a1RUkQrzyc0pSg9Gbwm07Hgvjb4KePvDd8PFPw08dalpywsSkEF4wVxnJUr0wa8XE5HlmKSc6d+9nZ/f/wAA9vC57jMMuVWmu0kmcJ4o/bS8e+EvCV/4X+IPwL0zWNUkvPMj8T2+5JUTPIIHDY5r5+PD2Y5ff6tV0vdc17ry3selSzHKcZjoVcXzwglZwVnF+euqHf8ADWH7Imk/Cyy8RN8ZHtNWupimoaBeWLJJHIeN5PcZrlhm+dUJOFWk5yTeysrd73OupgcoxuOnGm406P2Zc2r8rFnVPGP7Mfhvw0tvr/7VfhqxW9086jCunW8t3J8xP7lsYCv7E104fiLF1a3s3R5U02tG9eifY1pcPUknKEJNJ2blKEVbutW2vkeOeK/2s/2MLbQ7G/bxv4z1m/jusappUOnRW0TxZxujmJYg47FfxrHFZpxHUoQ9hR9++qeit5P/AIBq8qyCniaka1eCgl7rTcnfzVkrfM43xD/wUr+AXw5v5p/gH+y2NUuvLxbXvj+9+2vbvn7yIoCenBH406eW8V4+o3XrqnBrZav79DF4zhvBUUuaVSS/kXIn6t8z+6x8s/Gj45fGv4/+Mb3xh4v8a6tbC8mLCwgvXWCJW/5Zqi4AXtjHavcyzh7KcrVlTU59ZS1bfc8HOOL81xtZwoSlTpLRRT6ebVrnEHwz9gjLKMhThiT196+lcW1dHydWUpvmb1MzxJZqNLkjgUEKRyK568P3bCh7tXXY5GVNrFWNebBWR6fPcVRtXrxWVW7JV07sq3u6TIAyOxzVU2luaOcUtCtJDM0PlkEe9bSqxT0OflUrsgjh8tcFqlpTd2Yzm7M/Rn/glZ/wTv1D4r6zafF34haUE8OqUl061lZxM8gPUggDafxBr3MqwHLL2klpbQwzjFONeUKZ+wvw7+G+jeHrW3sdP0pbWOEgBE4AAGOlezNJRsjxqaa1Z6TpWjPayhrK23RplhKV61ny2CUlY39KgW7D3U6KwBIbIxRzIhyb0NCP+0fsn/EkSJgvVM4+tQ3K/uktQT9409Pt4mi8x4gZQOV7ZrROPLe2pNVtaLYs21wl4ptpoSjilGrzaMxnCdJ8yehMGgjiMM0GV7ZOauUtLSKtKU04vURIbWJBLAvA7A0U1CLuhynUbtIfNLFLbt50I2kEFX6HinVanHYyVOXNozyxPA+h+E5L0eH9Nit4r3UfP8i3ULGrCNUGMdOFHT1rzqOFhQptRVr6no1alWtUi6jbsrGzpOp3UFs7IUj+b5m3Zc+2fpW0W07msacbLQjGralKyhYk2ryZpiCQfXPT8ql1JSeiFOnFO5Dpvi7TfEl9J4a0S9jv5oji5O7IQ+mBwal1YzfJF3ZtCi6cPaTVjVl8LaHpU7aprub6VVGyFm/dp/wH1qpUKdP3p6sxdetiIezg+WP5nNfEG90HWQbTVfDkTM8fyxBsFR7jsK5JxVTRxHCEqXU8r8LeD73wl4mv/GHhTR7mWJrQrNLFlhGw6AFuv4VFHCOlUdSK0OmpWdWiqb0Zw/jHU/Hty0t3Z6c15qRimmtrdTnymxgMR6jNZS9ok9Ls0pul8MnY8i8XeDn8C/FDTPH3j3XbZby4tE028nupMKskpyowe+eNx9cVmqapVIylu1uddJv2ThTXU+o/hafCHhiCG38aIJmWxleRpkxGFAwe2DyePY17OHlRpP3jzq0atSXus+c9T1n4U3fivXvFOga6+mI8TpbpHI32dGTuyEAYPc46GvKqYilUnKUHZI3qucYqLWx4L8Ov2jPBvjPUddhh8MQ3Ot6ZqskF7IqMkNwOm+J+4I7Vx08TCcWuvkdM4TlQXY81h1DWNNOr6ANSZtPvdRcxQSEFrdnztHPPFZQag20bRpuSSPAvEev6tpt21jrENxHd2d1IgkgTdHImfvYB9OorP2ltWeirJKKMl9SNzcMonSMbc4Riv4gGuuhNS1NLNLUlsY5J7hElm3At1POR9a9OnCLaKgk2cf8AE+7F34mdTNG5iQIGUYJA9R614+cVIuqoLobVbtKKOXLBn5FeQm+UycXFEinB2E59KSlccLbFm3EplSaN2RkbKuhKkH2I6VjKcn73Q3dlGx9C/s3+EfHX7RXiO00zxbr13c6Fp4AnebkED+AGvquFeFa3EWLUpq1NGmHhiMdWUOZuKPvLw14e0bQNFttC0K2SCztkCRWyDrgdTX9E4HLMLlmHjSoqyR9bh8LGhDlSO10Hw15Vl9uEkLzbSQFIKwAd29/avJzLMKlOuqNM5MRikqvskmRTaILC6mluJhMjpv3Acn6Z6CuzBYKlRXOluejh25UuXualh4r0LR7RZtRskuk2/wCpkPGexauvE4epVd07I5atCtJtRfK+5oWHxOsPEelLFPodtFBBE6CPTwqY9CWI5A4r4DMstx2YY1Uack4dWtTz54SopumpNt9WchEmrSXsk9zqlxeSONkUTKuI17YAHJ96+pyrIsPlknU53JtJanp0KFOlDRa9yl4quIHhfT9ddjsjxJhymPYehr6CNONSOmx00qbcroZp9/HZxCPTEl2zj5zcNuY/TNddOhBKzOhpX1FbX1M8UUKlWXgpvAGKIUYUIKMFZG/LHlOq+F1h4317xxa6f4VYQ3ju2yaG6CrEhU7mcsMDjPH5V8P4i8T5DwjwtUxWZySTWi6t9EjycXKlRwtWeNiuRPS13daW6LW/RXXmfP8A8RNOfRta1rw2+sRSfZruaKWWFsqzbzznvz6V7vCOYwzbI8NiqWkZwjJejSH7R1eWpFWTWx8y+MYdR8Pa3OJ4UuQ4dYrmIFiuTzuHavtYRgmmdi51Hscl401K41VhabwkIt1EZd/lAA5Z1A6ZzxU1HZGeJcZU7NnhnhiHxXrvxDutViu7ay0fTyY7WJn2LezE8scjoK5qHPOq5t+6eDhaeKnjJSk2oEPi7xFqFkmoWOoQrDcrMGnVWyzxDODu7Lj+VViK8YppHXXqKndbnNp4+0x57xIEjdUtVVRE4OVxk85rCNSlKL1TDD4mFaHuO9jBt/Hml33iS+mu5w8EVsscBR8DHQ8/U4rejiaTm7M53jac6ji2aet6xDrmqXP2CQJAumiL5emAOTU1qrqzbvpY63Uo1aPLc+6v+CTH7Jtvfa1bftvfHq8vNC8F2ME2j+Co7AlJ9VvnjaMzNjBEIyQPUn25/PuMMfUxmX4im1fDxp2mkr3t+p81ia2KxWMdLDJWjZtd0eB/tz/tcfBnwR+ynJ/wT2/Z4ksdRefxP9p8VajbaZgxeRNI6wtIQGL72PTIOOtfzn4a8I8R8QcbLijNIunSpQ5aMdNU+tl5d9TTiHNoY+UcOm7pWt0S8130Pjuw8RWNnp1vqcUA82Esoj8w424wxx2PpX9ZYSvhnhE1pK+xzYWpTlho1L7XW/6FCTWbfzbpYUXy7lARJnlJh+PfmuatiKUeaz3LdSnOT9ns/wAyTT/Ect4jbJgk4CpI+R8xHOD7EcVOHqqpS1d2bUqsatPlTs1v/XmUbu9BuZrjToBsYcHbnHPP4Vw1Irnbi9DgqckKrlHVD0itri4EsroI2jOAp7+n51h7Snz26GtKphpSbeiaZ61+xJpF1rnxk0rQ9J+H1jrs3habUPFOpR6neTxWZ061snklhnaA7kR3SP5gCQQMZ6H8U8W40MJkknOvKnOs40o8qi5JzklzRUtG0r6dj5yUVLFRw97ayd0rtK3Q9Q/bJ1bw/wCNf2sn8c6Fp1taya34W0fUNWtLchkhvJrVXZMhVzhSgzyT1JzwI8HMFjcu4MeEqyclCrOMW93FP1fU+gw1L2cuXyR0+nyiDTNJe2kwZLbYCoHBz61rxHDmzG7WtiMQ17eyPVv2fv2Ivih8bfG9rC+mNb6M8ge4u2P3lz0FfP4fKa9Wum1aJwYzHKnDlhufq3+z3+zn4H+Cvhe00Hw3o0MbxIBJMIwGY+tfWxiqMFCCPn0pTd5bntGgwWoVosbSq55pK5NR30JtAvIluZVaTgE1m7h71zb0F47q9YqxAH8ZrSlq7sUYqMGi1rGpQwt5MRXd2I5rZys7GD+IqwW13d4d3OPSk9dzVK6uyymlzuBEJADjkE1OlxO6Y8aPLEvmG4XIAyAetEktzaLUkVb66eaby/KAIXHAxmhu60Glrc+Of+Ct3w7ibSvh18colVH0vWpPD+qTf9Ot4u6Mn2WaNcf79c9anKpFWZ6GCxnspOna9zJ/Zo8L6LYeDdYu9EfxRrGhXlzGJdQ1+1hWC2utih44NjFjGWz8zVpRw0VTctbGdXEVq9dRqWTX5HrngxbayT7LDblUjXEe1ahWUrI1lSVrtna6TIJ2E0UQQl+RjAHvWsJW1OdxR558Rmmm1O5klyzCQ7iR35rmxTc22elhkoJHIwT6dZzLd6vFPNbWqNNdw2o/eSogLFFHdjjA9zXz2KUo03Jq7XTueo3VdNqm7PubPxH+Jvhbw1pGk+H7Px9qtrqviANLb+B4lSecIRlY224Z3C4zkkL6VhmGa0cPhVSTcZbtWPEwOHnVxbUouVuv528irpi6bZ3Mmr6vY6hfpFBGE0/VmCtCcYYEptOOen0Fe1ltp4ZTm7ndilGM2qN1E8t+FtlH47/b08OXEVuixafpV/qaLGMJG+0RooHp+8bFGYxVVU6ae8vyJo1XClNb7H2PZX6yXSqx2SocOh71o42OV6RbLfiCSGOJjcQq8MqhTmk30OX4tEcX4i8Kan4Rtl17wncfbLRsvPaliSnuK2jBKnaJcailLkktTCh8VWXiaPzJpgFR8vEx5BHUYrGUU3ctRnTMnxpq0lzZx2FpKAkr/u4o1wSPc01JOy7mlOWtjhPH3hHQo9FabXNMhMnKQIyj5z9P8a0lScfidy0k3vc+VfjD+yp4f8U6s1yunIjMhJ479q4K+HfNcavKokkfPmqfs0SWNxfWsLyCOObDjqc5I4rOjhly3S3OqrVcU4t/iZMPwOlh1J9DvCGZk3wlhwwodJ8xz86a8iDVPhjYaE8F3JGCj/LuzkKe9ehCnaKOepUcXoZPiex0XTmECKMs5IlUjij2Svc5qs7nDeL5I5Q1tYLwM5kA+9WsbGHtFs0cvIn7nY3OR0P8XNKqvdLw6c66M298OWsymRVKHHXtmvKqwvG0EezKMI6GNqmi31ivmtA/l9n2nFcbVSEfeRzyqRehmlSW5HFTdWuZJNsZKSqFePatYRTV2OScdCnvG7btNKr7q0NPZpn9P37PnwU0f4ZeC7PRdOtlhjs4VSNCecAdB7e1ffvlhoj5rETlKs2z1Wy0UyTB7OEqxh+YtyGHpXNLVmfPdHT6LcXOl2JgEhcSR8lsAg+mDScnYykrmvp+nMg+zvLtMg3Lk+tQvMuLS942bWCOzTzzPGoUYKqBzWl4wRhOaqPlSJsusZmtolO7H3RSXvPRBBJytJkyWkcxWaWMh8ZyDV8iRlUm4XSeg6SNwNjWwZccHNXzJrYUJa3TsJHGxx5cYUjtmoive0KlLe7I9YuQIhFNJ5ZJ4BIw1ayld2YsPF3vHU5K7kO0oYAcPlW29zWM5JKx6HNaVyu1nFdj7HDGrurAydAoOe/vXPd9ClOn9oy9UiDo6XyMY4925XkAVh+HQVjLezOmEpTV1oWvDF7oGkRiLQdGt7fcu+d4QAWOP14p0YQhL3UkZVlVn8crnO+LfiNZG4lHntGOQjMRkc/e/wAKKsoy6msYS5FE5Ntcn8QX5jaU2lhE4N1OZBvmHcZ7muWNROVug6kfZ+9a5znxJ+KxtYzo+mXf2e0UOLOBJtp24+8xzyT/AFpVcUmuRPQunCMpXseUfs5/F7WvFvx18U2onPl2WhRILhAWAmdmBGT3xissvrJ4io49joxODcKMZeZ0Hx78MeF5tTS28b6XHqMaxr9vtLmMMspPTIbg9fwIqsTzwn7yuXTqyjTtHQ47xZZ694S0GXQLfxRd39jBCJ7CK4uN7xwsRmMseWA4GD2rGFOUU+Z3CE3KSbVjyL4veL9J8P6bc6b4os7C1u7+Em3ubeIqFjzwgwcEnAJzXLiIKmrNmsYSnLRXRx3gTxr8OvC3hy803UdPtb21urMkXdrDseF/Uj2+ppQnB0uXoarnqStseOxajZ63qd4NLuhJL5rMrB/llA6ZPY1jBKTZtN8tkeTX1vqOqeI7y11a3k8xpSY9pB3D15PJ9u9TzJTsdFFNRuZOqyWNq0lts3SqcbXi2nH9DXZSlFLQ0b1E09cukittOdxXtmvTptaNGkFJnP8Axt0CHSPGARI5Y5Z7OKeVJYtv31yCPUEYINfO5rKnLE3i9ep0VLxSOKIVTuP6V58btnNNuTHwkudzDjsaJvlLglFHUfDTwLq3xE8W2nhbR4S8lxKA20fdXPWuzJsrr5tjlRh8wk5TahHdn6L/AAY+DVl8K/Ctr4X02AJKoBmwPmdu+a/prJMqpZVgI0aejVrv8z7HK8PDD0NPme0yeCtR8L+HItY12I20Vz9w4IYj2rprY+E1KnTd2azxlNtqm7tCeHteOt6d5Nnpf2e2t90YRm5c9CW9a8/AZX+9datq2cWFoSdd1Zyu90PS6vZr/wAiRgImGUbPXFe/KHLtse1GPLTv1Gz+Fm8UzLY+WQshJlSOQYVcck5xXFjMQqOGkpbPoRUqRhFy6oSRtK0uxXTfDkyyxQJsLbcBj6muXKsLGhRvGNrnLFuc7tWZQn1Mwyh0AWR1+Zg+M4/lXsOjSfvJa9TopRlezM+62zhXmRGkZi22Rdw+uD3rWMGrWOuNo6FC71+9MhitFRY432tufaR69OTXbCmrXKklzWIdHGra34ktdF8L6LHd3F/OILdY8s0khIA+vWuLH43D5bhKmLxDtTpptv0NHUpYWm61Z2jHVnfWnj6PwR+1/wCFv2Q/CWvyfZdLsjffES/t7bzJbq8kULBaIx6KpJPQ9vev4N4ghmPi9lWb8VY9SlhqMnDD01ppB6yts7/10PFw855rlGKzCpFNpfu03ZKKer+48F+K1hbaJ448Q6fJ5iNaaxcIsF4gV4z5h5YADn8B1r+t/CfHxzDgHLsSla9KKt2srHpzm66hUjazS222+f5ngfxViL7r6CJoRHLvlSF/mkGevPSv1CCbV7lVHKS5VqeY+JbOyupwBazmMwkxhDhnBJyOO3vVWvuczScfeOA8Z61pWk6ra6HY6dHHNcgpbxTw7nnwM8Mf4Qa5q1SKkox3OHFYinTkqSvd7HlXxB8Kaz4uu7q71PWJY1WIwFYMIWc5woA6jg1w18O6y5bnm4jBTxiab0OU0v4MaLp2mI73s0cjJiWLzyGJLY2n3P6Cop5fhqMbI4MHlVPBXjFvzI/Efws0jRrWaCKGIx2ThElWY/vZGPb1x69K2eCoqF0dmJwdP2S5I2NU+H59G0qfSxuDNb7WyDkkgMDk9sGtXT9lBrujSlgpwon6afAb9o7wb+0d/wAEufDnw8ljMC+FNPbSNbXS4mmm0+7V18qZ4kGQrddwr5GnClKNSg/t3T9D1Mgw2G+s+1g/eas07LZHyn+2X/wTakfwvN+038G9Cm0fxHHZC68deCHhaUz5xt1K2TG5YpchiuMqzYr8cwnEGb8D8RPJ8zj+4lrRqNaNPZPpoup8fUy2pi86nUwvdp9nY+Q/jB+zn8dfhnPFP4y+F+q6Vc3EHnLbm2LxzxHGZEK5BUZGfQ8Hmv0LBcQ5XnFT2uFrLmvZpdyc0y/E4fCuvCNrOzS1HaH+z34n8V2Gh2mh2t1qOueJrhf7H8N2MOZ50DhPOdiNsMZJwHbuD6VrxLmeCyDBxxOMqxhB66vXtovPoclHByqqEY806lTVQitdN230R7cf+Cb/AMH7iYP4i/bk+HvgHWfmGoeDrq9udauLTYCXZp7SERk4A+UZ69a/M4+JuKjWaw2ArThpaekU77WvY+lqcKYirVi8K+VyV3FSjK1tXu09Fq9Cuv7Bf7M2nXVvFP8A8FMPDE6XkTG3Om/D/VJgyg4Y8qoAHJPfAq63iDnzg5U8rqad5wRi+FMfKCkqnxXtotbb9RmsfsR/ADwRFaeIvGP7bclz4f1FmGnX2gfDq5Zr5BklYzNIiK52nCsa8p+JHEVebo4fLb1FupVYq33Juxy/6q5jpF4iKctNl/mb9t8Wvgh+xz4Y8U2n7IWp6jrknjaeyabVtejgluzpUGHuLGeIL+43yDlcncjgZ4OPlsZhM445z2hXzumqSoOVqcebl55aRlGTfvWWzezPIzXL4ZKqdLm5pS1bW/p5Hl+rfEjXvix8Qdb+L/imSEap4h1d767jtYBGkZdt2xFXhVUYAUdAK/f8kyLAZHkdPBUG1yW+fVtv7r97nfg6U/YqS7dT6R+GXwu8Y/FbwJplr4K0uSa+W78uIheFyAQSa+Jz6P1nHtU9zDMF7JprqfrJ+wr8FPG3wz+FFlY/EC4jlvlhG7YuMe1ZQhOlTSk9T5WrJzkz6O0SKzgYecAeOF96NQ1Rbt7uH7S7EYXstDkkZTiri6HDJdamVt8KpPzfSoVnIuDvA6q9uLfTbUW1uAJCOStbQ905pvWxnCzubh1mkfr61LvcqKT1NK1jaGPy47gEjrzUtNq5V2QXWj6/dzCa1uxGnUk1i4SbNoum1dofDa3llEDd3ok4z61aTW4o1KdSKcNmC24vZtysM46niqSVhN6HiX/BSfwZF4r/AGHfH8Jh3zaRYQ6vbbeSr2syTZ/JTSk6nK4wFSbjXi13sfGn7PVnqnibW7aDwrqDRXWoQZga71kQW4k2gqGj7g4ODxyawlGTWsrHuqpCl7043+Wp9SfDDU9Yv9Eg1bUIkXZuiuXSQbFkjHzfMfbJ+lOg5T6nHia9NyvE9N8Nw6ZMj3C67YTeTEssiQ3qM+G+6QmcnqDx1rshTTejOCWI/ectjyzxZqcOqfaby0uAQt26MVbPPvXHiPdUme1QhK65jM+GOow2/wAQn1q+tYrq00XS5b25hlg8wO7fIgI785/Kvm8dmP1HFU3JcyfSzb7Lbz+49iVF1KDipWb87HkfgX4hePPilf8Ain4x3PhFfDzQ317p/gi6j0rF0XVW3Xm9gSm48L0BCgd+fNwGVVcTjKuLnNSi1e3Z6af1qebUqxkoxlG1nZaPXfV/0vvO90K51uP4f6fJ4gvpLvU7m2VtYupmzLPNt3FiR3JzX1mHUaeHSsc0m1PdtHKfsfObv9qbxD4ma3ymnaCYUuDyp3XCKVH02H86wxNWjUxFGPLqru/fa33fqdeFUKeHqTb1dkfXHivTGuwNT0jiVED/AC9GBrsq2lHQ4Izv7rQ2x14ajALHVGCsI8OhHfsa5IfFaRzzjJSunoYmsXt94Su2cyu9q4xuU5H0NdXvQ+E3hyT23OT8aeDNG8ZImpaHqraXf5wk0LfIc+o6GnKFOove3Hep8MtjzzxI/jL4d60l54002W8hVSIryzXcgX1IzkGp5IxKioW91nNah8VPC/jHVDcT67CIIjiKKRgCWHsabnKUjOTUHYTwLo+k/EP4nRaBHNDLbxRS3uoyq3yw20S7mLEdBwBn1Ir5zizOKGQ5FVxk371rRXdvY9HLqUqmKg5rS6/M+YtVvY38Vy63ZaXNNZvczNHLG3y+WWOMjvxXo4B1HhKTl8Tim/VmOZRpfW5pbczOR8b6vp2oaqjQRzWzWsuIGKYYqf6V6CgnucbqRirROG8Uy3GryR26IJmY7pFdMA4+lNy5Ymcry1ZxHivwnqFsRNcW7OjNlFY8JWMptqzMJxdzifEsMUEpgwAqnjHQ8VKlZmLg7nIs011PHFaoCzSkYx2rbldSyOrDzjCaOs8M+Boru/jfUweCP3eP6V7GDy2EY3kelK9XY6n4hWHh3TPCckd1Ywqu0hQy81eKwtCVF3iFShGEUfPOqWUdvI9xbL+7Lcewr4SpFLEOC2JilGJh3d2yk4PFdPLy0yZtyIoZg3zN1zWFSLa0FOcYM/rMTTTZIAiZIAeQZ4AFfeVU3Jo+ZrP98/U19Ge502T7XqEWVc74SFJ/CudNo537zOk07zNQm+2CxwD9xWXGaaTepUUtmba215OiXSWeNv8ACRzRKMr6IfPCDcWzRlgsbq3DABnUcqDjJpuCmjni6sJeRZgbZEpYbTjhDVxaitQa1YXKXFxA1tJHtRxgsGwayqc1WNiUqcJcyepFZ2X9m2i2dqzsoOSXck0qcJUopQ1FVrxrTcpaFgRyErtOea6o06m5z+0gyv4pjt10lprtAxUfKAe9XOOl5G2DqNVbR2OK1BnjhBzIm48gGuaaVtT0eV312MLVdTS0szFaWkmJWyxRt2/n07VyOp9mJrGMVNO+pztzrqzRTCSeWS7CcW5ACoOvJ7//AF6yqTdrLc7lG2j2Mu3v9e0zzrvUL20in8jlpZfL8tT2C9zWdKVSMtRTlCo+WKZh3EOjzI974j1QTqfnEKDJb3zWdbVe8xtyvZI848c+MNQ1WGSLR7PZaoTtRyQuPb1NcVSdSWqWhUILq7s4XWJ9U8RXVsvlfZLe1t2WNMYEnHU+9SoucfQ66fLTjqdH8HrLTPhzb/a9CtTHc61cpH5oXdvfPzEnt7ZrbCJYTXa7IrupX22Rc+MfiywttZvI9Ui+1rNMIXWZ+Ru43e2DjH1rTE14876mdG1OK6s8W8afHDwR4Q1GVPFXiqKws1ke3vLq4fCxOchPXAO3v1wa4/rFOL96VhyjOS91HzTr7+Mfit45f4gXHjaS6t1DR6c1kytaGPOAzLgjJ9a86KnWquTldHdQTVO1hJ7e4EUlre6lJCkP+tiht9sbH1U4/wDrGu3WMLFy0ehgan4g8JeHLC6FtcW6zSxlreZUKkH1Ix+lZR5Neh0U0pbo8gmk1bUpHa7nJLuWBVMbWzxg9s04QTe5ry30RnahBev5kV1MzurBd8gIJPXBrpjGxUaaRY0SRWv0WReduMk9fxruoSfMlY1U+XY2v2ivDw8SfDfQfjHpczSvp7jQfEsO7cYJFBa2lPorx5Ue6e9fNZhCdPHyT2YSlFrR6nisibmyTx9azclFWRn8KJbVHllEUaksxwqjqayhGVWaildsHJJH2f8AsG/AjU/DwXx5r1gYpZcGITLgheuRX7XwPkFTLKP1isrSZ6WV4Zyftam/Q+q/Cdzqk2sS61JACIXxGpH3jX6YqinCz2Z9PUtClyrqdl448Yat8QoLbSNTv/NaCNV8pFAWJR9K8/DYOhSqS5Diw+DpYeblBbkT22leH9F8h5ljt1Qne2fnPevWpJylyrY9BJRvZXZn6bdya55U+mqXTf8AIAh55xjBraoo0ldvQ3ilKDudLIYvBpez1DS0e8voTFNbyWpcopH3s9iBnmvmMfUwuN/dN8qb3PNqzlUmnFuyfRnGzaFcaJcxXNnHdJb3RP2CK4QBZADycemfWu7LuWrUlGlNuKSXl6nVGrGvdLdbjPEGm2qayl7FczSSNEBNBwY1f2r2aMZU99TqoQdNe8UdY1RIUedr6OFE+XezAE57e9dsZO1rG8oc7ujJeSG+nQwQdeBERtAB6sxJqZTlRjz6s6KTUr+R3/7OutT+GfFWsfFe6kht9A8BaNLcTsq/8fN66kRRL64wW/Aetfz19IjiKpg+FqeR4Sb+sYySjZb8p4ueUpYrDRwqu5VZW9IrVs8J/ZO8VeMZ/FPif9pnVWkj13xHrMl1bz3I3sih/lPTpjGB7V9X4ccI4XA8Exy2cbU/ZuNrdWtWCjSq/wCzW/dpctttNi7+1Tbnwv8AFTWLnUdRe9fU/I1Dzpl+d/OjWTJUfdGScDr61XhBThl/Cs8sev1epOHnbmuvwZvg5qWDjGEbKN192h86/EK8ttTv5sN5i+WDKQuFjx3PrX7NGUfZ7nXFOMTzDULy/wBS1CW0tHIsj+5NwDiS4OPuj0H6VtZOmmcsoybOD1LSfN8VSa5bLEZLAiO3Lckdm2k9h3PeubmXNZIweGhGpzyd2jlfGmq6Xpni0yXRKTxwloVjQ7RMAcNXNOqufUwqVXT2R5f4w8YeW0H2u+ljdrkm5h2FftOT1B9Md/euGWISmk9jxsXWqQrR3Vyfxlca34ouLApcsltCIpEtwoARAcAZ7kZrTESlUsovRHdVVWdONn1Oj8ea1BHbtG1y7tHaoGMTkApt2tyOc9K0rSapNXe1vv8AM7qtdQotx3OK8IfFD4jfCnxg/ij4L/EfUNA1E+WLiXTZSqSL12un3WA75FeDiMPSr1LRdpdz5S8qtVujPlke7fBb/grp8cvhp8X7bxl+0XZT+L9FjExurPR5/sMtw+wBDJt4ZFZUYrxnbXw3HPBlXiPLVQ5kpJ/Fa7sjqq5xjcJSVPERTS2lFWfzPUfE/wDwU/0DUf2c9Etfgrp+qz+OxHqttd+J9ctop47exvJYpZLSJj8ycxpk46opzkcfnOW+HWc086+szqqGHXK0oaNuKtdncoPMaMq6fuStb1Xc+W/HvxT+JmrfBG68IeENRFlLYyTSeKbayhjS51Cwd1dMSqocwxOATEDtGd2OtfXYnh6jUzqGKx8nUSSUbu6j8trnk4uniMJR5qWjW7W7Xr+h4NHLpDxOOrSruTLk8/pX13s8LSfKoryPAqSp1JO3XzYhtFtZDcNbFEZcFg5Byf6VlVjRt8KsU8HKEOaz+9ktgiljbySMdwyUEhwfQ0JYNWi4rmt5XOnCr3WqmvzPQfgj4PGq6b498QrHHN/ZfhR52DIWG55FjByOB97vXw/FmLjRzPA0lp7Sol92pwVlGVZot/DOV7nTIbYMpAKY4HY5PPriv0dp1FofSYKsvZKKV9D9Ev8AghMPDc8+veFJ9RkFzea3NJBNf3hkIdCAEUN90bT0r80xDSzivTe62PArRqzcr9Gz9dtB0qfTrdbeWUttxyB1rCc9TypJc2h0nh2TT4rvde491pxd0Q1poReJrq3jnLWoKg9NtN2uYRT59R/gmx1IF79pOAMj6VEYa3OhySjZGgbi9vL7CqeDjGOMU2rHI4K9zTniulgWNX+qqKFvqaR5UX9OjmWNXnLZ+nWrdrETXUs3d9KseFDAY6etQmmiqbdyK1WW5wbhSFxnmk7FpNPUnvoLRtMkghl2SFfvL2pRumO2p538ZdFGu/Anxv4UuAZlvvCOowMH/iLW71cGk7M1ovlqRdup+ff7Hvw01DxT4X0rxDceMbC2sLrw+iS6Vf6Ct2skpQYlVycq/YHnbk461y+xlJ8ylY9LErl11+TPpH4LeCZ/CcQtPEGrrqUjXEjrmDEaBhjbsbtjI+lFDDSpO7dzkqpTaaR7f4ZsdEnjFjb6VDh5Iz5qQKjR7AQmMddoYgDBwCe1dtGlCLukYSjed2eUfEDTtHttV1e3tZBDKsvmNFwd55BcDjqR6VjWw6kmtmenTqVZRjZXRzula9Z/DbS9W1C7t3nuUuLSPVIV+UpCUL7WPYYYE56d6/Ocfi1Uz+U6LbVHS3fuetKKlRUZOz8zL8Kar4asNMu9C8BaLq8GlPfCQnViT5jbchY+zR4b5WXgg8E19hk1f6xQqOEXGMnez6v+rnJiqVSi1zal/wARadLDoUl9Y2YeU2js8US87iSAPYnAH41vWXJF8pxzTUb3Oa/4J+6L4wn1PXY/iFo8Om63HpUa32mwSB1geS6mk8vcOCwXaDjuPavIcZxzGMJ7pGkXH6oprZs+j49Wm0xRYl2MTn91I3b2r1Y1LoyUebYra9ax3rSX+mPmRFG5PeiynIycuWdmYVxr99C66XqtuGt5yW+c44HbmtXLljexclFao53UbKG5nkl8HaudsR3y274OP61hGLnLQTqykuWxyWv/ABJ1Tw5LKmuQGW2lfakcnKqMc5zVzqezMnBLU4HxJYfCj4o60dLj0yz+0Kha4uIAFMI78j1qaVWFSVrFxvLU57SLfwj8Evhh8QvD/wAOjdPr3jG1isG1BpS32eyDHzVQk/Luyc49vSvi+KeE8TxLnGDbny4elLmnH+ZrZHqYTHwpU+ad+aO36fceY2Wk+F/DtoqlIvLe32xxmTkPjuK+5Spwb5TyqjlOXM92c9faBoN3ezzX8qG4jQBXLfLg9s1rFprQydkzifGUHh+0uWlsZk862OGiLAZHXj1qJKPUmc7nkvxC+JelsZ7fTnV3xkoeqEVzyjKWxCbUbs8k8Qa5NfmSUsTufIAHSrjTimYNyk7HN3F1qFrcLc2infA+4g9xV+1VGSkdOHpvn1O58MfFvSLOz+03zhJwoJV+xr6DA4pYj3Voe1TqQgtTkviP8Vr7x3fCxsm224b5sGsM5rQw9BtPU46lf20uVGHLEj2/lEDgdK/P1U/eOTOunSbjqYOr6GTF50PJHUCulV19o5qzlFaIxgdhKsOR1BrSLjucEm7M/rZ0iSDWJ7h3cYUEBW9uwr7ed5TZ42ITVR+pv6ZBPOVaa1ICjCqjdB71zpamUbNnQWWfKADbju42vyKbvsaI1rWa4LiWOY4x0JqoppBOEHGzRc8yGxjF0YNzucbV6k0SqKnE5Pfm+S+iJuZ3EroSc8DPT61zybk9RxXIrElzI+0IelbRUrGD5egzDKuS2OOtdKglG5ytqUh8IDKCkxPPJpp3jeMiuVLdFLxk6tZpFIPfBP61lVm3JI7cBBRTkcLrDqcLCWZVU4LNwzd+lYz96DO9u8bI8s8TXXivwzczT6ReuGus+ZCWzGfQe2K86UJU9Yvc76dGnVV30MS48QXGkW/23UxMmAQxEZIJ9ff2qZVY01qinFSVjh9f8V6VLei/1vV4vJ8zeUuDxGB3YHqa4ZVYOV7nSpWjyxRTb4q+HfiH4tfRfCmoJKbNAsdnbqBjtlznn14rX2kK8kodDOScYptNF3XYHkke51fVbbbbx7ZIcYSI/h1PNX7J9WKFNX0MC7vvDhWZ1glmeb92HBOfdscYH061N1TXKlctScZanj3xF17xHDqTx6T49vrVNPcTQRWKlQrr0b5hznuDXnSU5S53JpI3U0k7Lc8g8XftgeLdSudesfHnh97y58mOXS9R0uLarzI33ZlPvg5FYV8TJuUmr3K+rNRTgc94O+Dl/wCMLm58R/ECw8691OAzby26JiR9wBugow1B1I3qIcZpK0TWX4N614MtLi00uyFnFCFZbWIfLjrk46fypxo+zl7uiOtOMYjLzw9PpEhl1p4UiNsWd50JhIwSC39081cvPQ5nJt6HgGv+HtYl1y68QJq7XNnPIQscc/mRJ/8AW965V71RtO51UG3G1jK1EJY2kqRxHmPdBnkY7rmuukrHZokYMlzNewlHnLHAJYZzx2Oa6I3UrXCne5NZApMYmTfHjJIHT3FejSlHmsjX2Tb0LJ+IOj+BvG3/AAhXiu8T/hGfGlmNN1tM58ok/ubkDs0Um1h7ZHeuDPcPyU41brucsa8KNflmtzz7xN4Q1Xwb4hvvCWuptu9NuGinA6Ng8MPUEYIPoa+Z54z1RtUTT1Pb/wBjT9nu08Za2njjxVamSytWykTDAOB1561+p8B8NTxNWGMqw5o3+5WevnrZfO/Q7cvwP1qXPPZbH2d4b+IGnf2va+HLfT40ggUIsEacqvTk1+z4ilCqnTS6H0MKEaStFbHpP9k3VtceRo1q7ySIDFGF6DHJrhg6eHgoXtbQcqkIRTmyLQNG1GJ3kv4cFv8AWgH9K9GMYcqkjspzg46FvWZLSS2Vb5EJC48lm+VVreDlb3TWmhV13+xbZYLG3SJcBoyp9ORmoqUZVrqQSSUThdej8b67ql5qKeJbyGa9XZKDN8vl/wA8/Svk63Cc8di1VqVGorojzv7OnVrc/NaK6G74bF1omippM9w9zLGoAmuCXdR9T0Ht3r63D4KhgqahSPSiorRGfdvqElxPLZnaka8s6nOT3xXdBK12bqLbKWoNDcLHNc6Uk8akFFmHGQeTj+tNxbWhtT59rmfrk9lp6p9m1oS3czhYrO0TKs7HCrkjrk1lUnGhRdWtK0I6s0cFGV7adfI1/wBqnxEPhZ8OfDn7HHhfXo4/FOpzrq3i+S0YNIrNjcjZ5AVcKPp+f8mZHUxvid40yzKlKUKOBfuSS+3FrueZhubFOpmMm0pe7TX93v8AMx9LtrbQ9Gt9C0URx20EIVQxxg46H0zX9eUMNDDU/Zw2SNqOHSld6mb+1JHpN34X8OeJdNXd9t0GGK+uGDs0lxCWjcBm+8FUIOMj3r8T8OcTCHF+eYGD2qxl98VsRTUqbqwmtU9PR6nytrmoGyubqQRM1q7Yllk4IH09a/dIQSiOE5cq5tzjNR1hYLi6bQoPuoTaSgA9R2Hb3reEk1YKs7o4bTr+dri9ZtLglhhsytxK4Pzuc8fUVnyRs2zi5KrfNJnA+M9e0vVtXOoG2EsMEQS5nQ/dcnhR615lSalUMakoqGqPN/Ed0ni7xwugRgXN1HGAzeT9wE8EccYrgjS9vXcO2p506lPF13R6x1L+oCPQ9QW1vX3GHS8Ro7gjecYxjryfzrslBQlYbnKnXUWw8QNc317fRoiwrLpoZozyGfaM/Q1lWd4NHXiVejyLdnHR6LLYQDz5kMtxPsnlUcqhGV49/wClccaFo3W7PNw+CjQp80nqyjq9qxe4+1b2miXYXK8Od2On0rWalJNzeoV1GcG5aln4Z6zaeGvEaaNqEyppeqsI2L8rbSnG1/pk4NfNV6Lo1r3919Dz8BiZ4XFexb9yf4M9K0We78NeLYZ9PRLTUrSaSIMyApiRSjKyHqjKzZHTmuTHUaeLw7p1Ntz6OvQhWhKlU6qx4NqHh1tI12+0C7hAuLC6eNgSQBhuMcfdxWdPlrUk+qPiKdOilKm170WOW1mhZ4rkghk+UP8Ax040+X3WFN11JxlsS2SBCCBwchJB/KlCmpyu9kaRhUjueq/s2i/n0T4k6HFO8dvP4GkuLqFYwxlEMyHBzzj5s8EdB2r8748jCGNy6s1qqqS8rnM8FUrV1Lmtbp330f56dV2Mj4cW++1SKXqcEbWIJI5/Cv0hzUocu3o7fkfR4Gl7KNz3z9hj4g6toVp4kfwxdTWl9pfioTW80b8jIBxnPQ46V8J7H23FsuqcTzJTjLEziu5+737IXxstfjP8FdL8XXjhr4W4S9UdpAMGscXhZYfENPY+bx65cS0jvb+5i89XVip6lQelYIwhe2pR1XUHYeYoPHU+tSxcnv3Nnwff6i+nMDNtBHGaUJDm1HRG5pgmBL+ZwOcmqSuZrYt6fc3M918xz2JIppJE7PU1NSu7u10+WawhV5Y4yY489Tis5yaj7pfIqkrNnFfCHxB8WfG93eXfj/w/HpsUNyyWsaSlt8YPDHjjPpXPRlXaftFY3nRpUfhdz0WZo7aLYDk98GtrmPOm7GdfzCOHEUZ3N1FNaPUtJGffWMepaPe2Nwo23NlNCy+u5CP60+W+ncFNQ97sfnz+xDq1nafDDQ9NdJZJI7MWu2Lna8ZKnPHXK1ph6UlDU7q1WdazaPoPQbsPeCN5SAWOCTzmt/dWhEYOx6n4BlE9xEdg+/yCfve9UpWd0c1V3i7nnd/o/wAObrxN4i+JXiLwxg+GfEiw3GrLqzSvdS+QJFszbjCxx/vAc4JYjr0x8NxDnGN9rPCUoPW1pLV69Ldj3MK3h1HlqX5o35bba737nlXw68Uan4m8KeJ/EHiO2W4v9X12a6SO5UqNgwqqQcHbtAWvCwGW4unndJ0/ehFe9dbt7nVN/WI3et317Gx4T0xYNNtVtVaOJH329vJKZPsse7AiBJJAUHaBngV+jzvKba0OKu4qTSjZdhnjrxENN0K6+ySGIvFIQ7NgALk/4Vx1bdCIOMrmZ/wT7uZLU+I51iKvAbQzHcSZWYSSMef9+uF0lLNJNfyodSSeEgl3Z9Ba/Ja3AkeA7opF34Xqp713OKTsjOGhzlzqeoaNbJqNlL5gX7y/3x7+9Q24O6MakU3qGpXqfE/SUutPVWMI2tGnDKfTiq+sRqRsJK0tTzzxFqN58ONQl1CeLCyqTcKTyvGMmsruDvEc3GS0POb7x7onjyT7Dp2pw3CwZeeQt1xU3UpWZmk46PUwr/QNHstRuY/C1wLe5uIgbiTdxjrj8q2pQhF6GzcfQ8z+IHiR/CZFtb6gt59pjMcQByR6mh1OxzznzSsjxzxFpviS5v0EfiSdZWl3JkEBB/dqIRbbuKnKXNuc/qdz44t3utMv9XJVvmTjofeuhXgtBuD5rtnnXiK28TAyvd63K1zu3Bg3UVi5p7mdRJHFatbvNLJK5Pnj7zf3ql1Eloc9p21OW1gm1ZmcYzyAaIyNIRUUUNIna+lmMij5hjGarERTpnZhnzVNCfVfCNre27POuCqZLDiscNUqUnozuqwjJaoxILK3sMxwhTz1HeuPH4irWk+Z3FRoU1JND9zMmBXlxUUzslZIikcbNhWrabORpSepha3pClGniHI5qozl8LOWdC+x/WnYWEGnwjysrJIRggZJFfoc17zPnKzbqv1NjTLaK2JmIf5u27PNc2zMoq89DaiaAooCNG55x61ad9S2aEFzHbwoxTcz8Ih6k0TkoxDyLdhBqVq3mXZV2c5xkAKK5kpJ3MZqlNaMvw7MbxFgntn+tbxjfVo56kmla45wd5YqcAc1aqpOxjytq5C06ynEkZC/zpSqKro1oTGk4a9SW2uICREkR46cVUKtFPlii5U6jjdsw/iHIGEaGQjAzgd6zqe/UudeCVqTOSluIsbd7ow4YkDn25pNt6HU99DH1zSbC4mhWO2ZgjbpAU4J61zypu5tSlyJnP8Ai600gqzagqA7PkCgFUHbj1rKpGnfUuEubRnmXjfwH4a1zTJLu9s0jQPgLjlyecn1rjlRhe9jppqUJXvofP3jr4U3J1lrjw/NNb3QuNlk9hIYpWJOMll6GuPEU1Jrk0fkejGpTcbbrzPQPAv7OHjfwboi6v8AFX4g6prtzLyLS7vMrbrjhcAfMenX3rtoYV04XqSbZjVxHNNKnFIwfF/hmyke4g0HVNZa7CYmS2QlYUHJ2nHYDrXNiY03rdoXJWmvhVjyB/g/4h8Zakvn+ONXubOJ2LW87rGAOeGKjk8dM15vsufVSbQ1JQVrakLfBzQbHw3/AGzqEMa+d50kQbnMaL1/PFdNKEVA2p1L1OVmp8C9QS08JW+leK4P9IW3kWxdk4dHztJz6HFdVB+7qOtZSvFFH4o+M9N+Ht7caxrs0iWu9re/iiTcy71GGAHXDZNY1uWjLVEcs3Gx5T8QNT1LxxYyaJNf2t1FboUhurNwxnhPIEgz6flXJUhKcrSOmnBwR49a+DLPwrKYbWOSzidmBiEg498Hgj2qYUYUdUdkVaN2YGuaZNZLc2ryJcRySb1+zPxn+8B29xVOpLmCE3N6HMTWyQ7k2BZMBj83DCuim3I3s4q6LFgR5yqqcA9Cfzr0aXKrK+ppGT5XcrfEL4W6Trm3xL4p8beENJ06Q+WEuy0+pyEdSkSZK+xbANcuMneo4tq34nlV4OpWUrljTYLT44+NNHtLZpJJrO0isbi6kTD3ccXyxyOOzbMD8BXNkOTyzPNI0Vqr6nq1FHEVYQifa3w08LaP4I0O38M6dAu5YhvG3viv6ey/BUsswSo0ktEfWYWgqFNJHRWVtB4a1BLuK0Tz3wBnoK9KjTVRK9lfft/XyOipG6bR3l/L4judJgv7TVJLOWQYYw9QPT2r56pgOfFtvYwjQpzl7yui1Z3V3oWkqzSs6sQ8js5y/rmvVcYqNl0OqMYr3Ymhd/2Xqmnx65e7ogQf3W3JY9q5oV5QnboEJyb5YlSSwkaMyz6XdhWG6APH2Hc1vQrUatSXJU5n20djeXK7K6fcqanqOk6bYRXuozw2kXLBpWwTj1rsjCdVWKUVZ8pn6Rqtxr0cmoWUq5ZTtkIxhfX2pz5absR7JU5Ixbi81RPMgtLsSx44LDJds10Q5XC7OuN3K7M26u767mkeW6aIbcFs4Bx7VWjVtjWMlCZ2X7MegnX/AI2aLNeCE6f4dhl1jUWkGdyxLmPdng5cr+Vfh/jzxXPhHw8r1KFTlrVfdhfv5Hm5nUbwVSMb81S0V89/wPHz4i/4W98avGHx9vLkq+r6lJBYXbwBWEETlcKD1DEEj2NT4C8KYrLPDmn9am4YjEXqSmklK8rdWn26pryPRjQp0IU6UVdQSj9xt6vqR+y7LcmNCw+SXjcc8Mea/dsS/Y4apUk9Ipt/JFwcYzSO8/bLtdPi+C/hKGHxfpGrTeFY4rC/j0iBYYtNM8Xm+TIAfnmJwxbjIYcV/HXhdxfVxXibiXUSUcQpcrSt8Mml6vTVniZfSUliavs5Rc5X953vbS67LyPh3x/BDLDfOZyYyCYwRxnHGK/sGlzSpyble/4f15nSppRseR6B4ovbaO6uWmc3fmGMebFtUjphR/Wqi7QS7GMXyO8kZ2vXc9rodxp9qQr+ZmW4xwxPJ/D+dPnTj7wV60XTstzyOLSNZsEu5J7kGQzM6pKMIsh+6qjuf5V59WDk27nk0qFWUnKTG+GvDN94Lvp9duGW51O5RjcysASpxwo96dGHsE5dSo4aGGk5rWT3Obu5oT4ktri9YSPbptUOOsjEHB9xXBKq1X5medaP1yMpdDf8R61ZXlw8QsookMbSQIFwJl6ud31H6Vu3dHrVqsVC5zs17BfX2ovHEuJniRYyuSgGByPpnmlTqRjJo89V3UvfoYmowQsbmeQyYVgwc8fvVHzj8ea56uJp3k2yZyg4v+tTkmimvZJUuN+CCEUN2HINeK/aV230PnnTqSquUj034a+LbXxHYrpGsXL/ANuWaqqzO5P2yAABcZ/jUAfUe+a8r2FdSkpao9nA4qeIfs5fEip8fvBk1nrFp8RrNJmivEWDUWdOBOq/KT6ZXHPtXmUavsq7hc5M3y+pRxSxSWkt/U4d4orlGRZi+QFUA/db0r04yVV2TOdLTQq2sM0jG3lVo2Unhjgmt3NU42e5th4VJP39D1j9luG3XV/HGp39xcQWVr8OdSa9mtPvgMEVFJPGGcqv41+b8e11OGDhFJylXhyp+t39yMZTdSo7dFf8TD8HahFZ6T/a14oAiUmMnBOcdSK/QVSpVmrr4dfR2Omnip8j5Nj0r9i/xQli3iPzp12vqMUkmF7kf/Wr5qpThhOLaS/mgzgwlGTxEm+rP1Q/4Jg/tA2XhrxfP8MdRvDFZantktJGPy+YeqjPSunOcPKbdRLREZhls6rdS599aiGgcOhB6fMR1r5ByufPPlSsUJbs3kqIEyC2DgUJajWx0mlultb+QyhSFHJFbwSSMJJ3NXTXldstwCOMVErtidS2xpW08NjG0siDg8ZpO/KKylZP1DQ9YfV55HH3RwDXPGTbNWrGr9pFpBsRR747mt0roJ3aGiZ7ltwx78VLdjOMU5X6lS8uA8uwbiAOSKlfEbOzK9/dpboiJwpcA56n2q3daoqMb7n5o/s13V54c8Q+LvC9hqRhm0PxvrFonzkBY0u5ePrtIrXD1G6Tv3PaqwjGEVboe2/BrxjceIbY3V1cRmaHUZoOufungn3xShK7OTEyUFaJ9E/Du6jM8JJDFk4yvb1FdEVY8itds5H4oSWlleyW+gaFBHLf3Uc+pFY8fapEBCyuO7BcAZB4FebPCUqmI9py+8z0cMqkYat26HnWoWN3dvNNDpyTgxMRHDhJF2/MSB3FTG3M7npxfJFal7Rzb3ly0aQPbK1sj4cYIbg+nQ/1roSlu3c5asalbRPZ/wBf5Hk/xW8URXWiSRG6VY4pJYpDnGCD3/KuNSTu2bVLRXKlqdd/wT11a1N94wWWyWDN1aRSRE5Ab7Pnj881zx5VmEvQqWHdLBRb3uz2xJLjwhrs/wDarJJYTn9zJ2UnsfSuybS2OKVRTXuvVbmZ4s05iXvtKdktZD+8AIxg9xWL5uRtLUqm1OSU9jJuNY8EfB3wPfeI/DF3d6hqF66mSZ50Ecch6gDPBr5KjjcdWzN0mrI9XGYShRw3Mnp0PGD4o1zxBFLceJbhnuLxHZlbkIuf8K+ppx5VZnjtJrQ5/wAe+A/DZ0SC+0BhZ3U0m3zITtLHPcd61lShJXHFWvoec+JLPx74cuZb0auPKwIljOBvXuWNVyKMdGRU99HH3MWqX+sSX94RM0PEZCghM9SDXO0rmcYnP6vo8k9/9hm1IkQnc0wPG6tYxbRajGK1OV1+K0m1F4Irl/tIXLSHow9KbT6hKV9EcF4iuNIjuJIZnxG4OHbjYwrCSTehm5Jbnl3ifxDYW88itIrSITwP4qXs5JamM5pPQ4XVdUur+QvMeSeFx2rWNJJ6mSU5kWhXa2t+YGYAOeTVziuTQ3wNRU61mbfifWRBp/kx8NIMYBrjvKMXI9mpzX0OWDbW2ntXlt892wcrLQVb+3U+UT83riuWVKSlcFVdRWIZpFJ3KfrWsWr2KUJWK904e2dPUGhJKqmPlkz+sq0g12C8WR9PhNsvLv5g3D8PpX31epKNRp7HyFZp1Glvc17azRhJNYXrum3dgLioS57NMmMZKOpaQeWIznzGxyXyCKtWi7MlX1uTaXfTalqBuUtmMFv8sJUck9zWM25O5pCKtdnQ28eJPMaVyc8B26UQV2ZTkmrWRqWyxC3M8p2qoy2a6XZQuzyqjlz2RBFczzKZmtiqs3yAnkj1rjhKb962h1ezgklfXqEzKDvZcexrWM0tzFQbe4kE80vyxJgbutaRlzfCjRw5fiZzfxKlaFgGBAC9RWjVmdGGTdPQ5SCWGRDPcW+4RRkqrA4LdiaxlJROu6iZk0fiLxFqn2KDd5SriZ0wFHtzXDJVZTv0LfJY4/xytxYXL29tFvByryuvIx6DvWE072RVFpq7ONvtUslmj0OzZvtRQ485MhSRyxzUXa91HS4ycXJ7Gp8GvAmiXPjg6i8ouotMjzKWiBEkzdCPXFdGHpw5+boZ1JVHTsdj470q813Uf7Js3TzJTlsR8Rr7+9Ks5Tk4xNKc4QhdnAfEDwPouk6a9hFIbi4l+Rmh/j/2R/jWNSnzRUWXGrKT0R5F498D6foHh94LCaOByhSYK3C7uo68muOrQhTp8tzaLcp3PJvFPiO98T+FNP0W2eC0nvr6Sy02ytjvYWkWPMlb0yePxFccXKUVGJqouFRnTa14Z0fR/B9pYX8ZDyoqQSltrqwyNvPQ5xx716E0oRSKhJp6nEfFLw9aPomoi6tRM32UxmRxz5u0ldwPQ4FclWKBXUz5A8Dabc6Rqt3caXcvBcyXLtcQkkLLzyMdjXA7qo7HqRilqze1bXdOcSWl9ZyPEg2tkENG3vXRKVoWY276I4TxPYRTXHnWc3mxnpJna/8Aj+dc8Vcqyic/dWRMpDtIqDpuwxB+orvo6I1vdXDTTtuQx5IOMkGvRopykmy1JtWOI+KtpFN47urlbdFc7cSLIDkY/SvKzBRjinLqYSpJSuz3X9gb4e3Wo+I5vF13A32aBflZl7+1fe+GuAq4jMJYiS91dT0Mtpe1xKl0PpDV/Fw0jWVFxMEDtiIbDub2r9vlJQk3J3XofTynCE7M9T8KWmmXmhJ4k8QwhHUAxQtwT781p7VydobGrm5L3VfU1F1KeTTZJ1A253Jz2ry5Tn9alTcdEk7/AH6FTWtkaEl4upaR/as8PmxeTsxj5Fb1JqcTi8PgqXNVlZGEEqMruRz+t+Pm0yxt28CmDWNQkJR7eQlYrUf3s45r8xx+N4j4jxjwuApuFHrLy8jgxEsRi5Onh9PMj0aHxjIG1Hxb4ukvrlskIPkihH93Ar7nhrhSlw8pOVWVST6y/wArs68vwUsIr1ZNsqanYReLNQRdSaJ4oyFEKjOTX1spuEdD1/a8tPlSG+NdYt/C+ktp1tJGJDjzCAQCOw9/pXNTvOd2Rq5Jswb28h0+3guZrpvOePdJEuFEantivQo+8tDdNuyRnXniRL2/aKztERvLCxktuC5HA9zVyjaDtuauFtzrPAPj258DfDD4jXuiFpNbu9MtNOs3RCzr527cxx/q0GAcnjiv48+kbgsVxTxnkHD0P4cp88vOzWhz1abr1acXtFt/M81sGs/COkWmhaY4keCFUhZuQGA5P49c1/X2X4F4DK44fD2ThGyvtorL5Hpuneau9DY+GmlN45+JPh3w9dAS/wBoazBE5c7QytIuQB2HX614HiLmdXJvDvMcbtOFGTuu/Kzlxk/q9GpUj9lNnX/Erx9r37QniT9qD4K3+haBbN8M2sJNAi0S3WKZ7dIwxkucEmR8kgNgYGBX8JcHZfheHMDw1xJRk28ROSqc0rr3paaHz2VYpYWNOnKbftG93fVpPTtqfEGqeKFW0S01yNA0qjyNy/I2Ofzr/QvDYmKgrvfbsd1VckrSOB8YaTP4r1AxwAWyJxGkceCR1ZuOgrsc4yIqc1S1jhPF/imezS8iktndIlXbvPHy8Aj1xWFSpGKOSspRVzjx4ltr2KK61FRJbxQsYpFPzNITyfrz+FcUaic/IijLrLYoeKfFkIvbqHz4obxAkkUEZ+VQM/NnucVjiKnNLl2M6+I10OHs9UXU/F4WKcOpk82aUngFuMn8K4aShUr2T0R4kKyxOMsuh0F1ZWN/JPci8eOCCJIsMf8AVxMcbwfXqce9d9Wzi0j3XTjOkZ9hb2lm90qyss+1trbv4kG7cfqOlYrlirHJUjGjflOanE1oGiurgNFI4mbuIXz3+o/nXkVYtVGjyVGoptN6MqamsFvcTXOxFXcPKfPGcj+YFCaorU7JU4RTbRV+2QQyhoN6qZQ1tIj4KMvbNYfWk24W0Z5bnyV24m+vxd8Uaxoc2i+ItVOo2k6LHcQSjL8DCurHkEYFeTicuwsn7ZaO52PMKtSg4VHdPoctM89uGSPDoxxHPnG4eh5/zitsPFKLuebGFWn73Qcl2pzIsilguS5PX25qakFLqb1KsqiTj0Pa/DdvpXwt/ZD1C3ubyAeKvirIJY7VnAe10K0kIViD/wA97gHA4JWH3r8czKtiM843p8sf9nwn2v5qkt//AAFfmckI1E7SW6ueXaJeqsBhMWwgEMB0U+9fsODSgufvqejhH7lrHYfs638tkPErmQBlltiGX6sK+QzetJcWYNN68sxYapCNeUfM+s/gfrX7ROv/AGbV/wBmqaxvNe0xlkfR70gfaQvPynqDXuYv2sqTS0NMdXgqbaR+xP7NfxK8ZfFH4G6J4q+JHhWXRfEBtFTVtMuCN0MoGGGR1Ge9fIYmnCnPQ+IqRakzudCRQ7XEyDG75ciuTdhzWibojkuZA8fAHXFbJ2ISctS1Jqi6TZvfXcgWGFSWZj2qZzSVxOBT+GnxP0f4o21xdaM4lt4pWjEingkHBrClWVZXRrycu61R2dtNp+lWpEQC45LGq0itjKd5O6YlpqkWoxs9u+4Zx04pqV0U276ssiT7EgcnqOaLJoqyRUnvw0h2gZPUAdKnqN6mXqt87X8UCw5jRw0kj9Bz0rRRbWgN9j81/AN/Pp/7RXxl06NzGkfxR1MCRByiysGz6Y+b9a68HQjG9+56CdWVGM+tj0/4N6hpWmeNvGfhnTL8XMGneIwsd0zcsHhU5HbrU1HFVHFdBeynKnGU1a59RfDOfzBFCABlF2+qisveepxVlGKOS8X+I7DxBc3Wp2jTyJDezWccs0e0yCIlGcdPl3A4OOcZFc8oyWrOyil7JWZw2nSXH7y8eYAhiihTggdvzFVGMdzqcWoli9uZ5JJok8+4mS1JREYAiNFy3zHHIGMDrxx0ranCU2+Xo/I56tXDwtRmmlO+qT/NbPXTVPtseZeO9A0mysB4itp0uItSma5+zEkeW65DCRCMjJwR7ZrzsNKVbEVIzjy8r+89LE0qVOnFKV2/wOm/YFtm1Sy+IJnZY7l9btjbsuB8ywDA/LiojSi8ZORjiK1qMILoe8w6hBr9hL4c1+HaSCJVZeQexFVKSWhxu25x0Wo6l4M1FvCfie4EsEpIsblvusnoe2aypwbndv8Ar8i3G8Lo5Lxr8MvAdtqMutzy3MU8o3ACYmIsOjFelJ4elGpz21MJOtOPLfQ8o8Walq3hq1ubzXNNaNHJSK9hGUKZ6/7NXJcu5cVyK5laF4m0nxfbDUNI1WO6trKMCORGzl+/A6U4zi1ZMxlVtIx/iBa2OpWX9jJdFxJHvnkLfdP1qHdvcnnb3PK/EPgvU9Lae10nVJEjG1kTeTvz3qowi9bmqmzznWNM8X28d1M2qu+XxKm3tRzSg7IJtTOC1+y8cpem6/tJ96D5ABwy1M3KWphOJxWu2Gt3cMtze37M8hAkUHioTs9THllc4zWNLFtIzM+Tzv3NyDT53IFT97U5m9uIVkKRNuZehBraEWtzZ26GfcH7MDMzfMORzWt9Dgb5J3RfmvWvbSKWVskLjrXn45PlcUe7hputBNlCSQsSVNeQptROtwjE87+LnjbUfB91DPZsSCeVzXsZVh6eNUozPAzbM54JrkRY8A/GHTfEgFreSBJsY2k1WPymWGhzQ1R05Tm0cZ7stzsgq3WDG2VYda+arVJRvc9ty5Xc/rG1bwToPivUba41kXZNmd8SwXjxoxx/EFPzfjX6DiMPSr1nKZ8kqkqUm11N6Gw0/TrYPGxiUY/dZP3RTtGEbIzUpTVyD7R4l1m5RtFvbaG1DEXkc8JLsuONjZ4/GoftPskxhC95HQ6Yw01UtlYgr0Hrx1otcmbi7I1rZ5X5ON27nC9quOhEopIv6hdMbdLKI8nBkJ6Y9KVecpRUEcFKleq5vboJNeuArHC4HT1oc2lY3hRiroiv7pwihRhmHGTWUpsdKjHmfYn043DRglcDvmumjKpbRHPiOSMrXOW8dzSXVxIhXKquOR0rVyu9Tuox5KCscTf3v9mSSIlwQXGY1PQfhXPNqLNYQcrOSMPVPFMlpaS6fFe8uS0h2gZP19K5G5NPU6VHXY5TSdWm8671PWLoTGOPbbrIo2qe/FRRVpNsqUVJpLRFK50QeIJDLp0Ku7Lh1SPaxz1OewFOSckzSU4wjY6n4O6CNF0fU54f3gtSFjO0/KxzkZ9ff3rWlTtSvcxqyc5pIzPEXie/tJHs45gHugWlmAwFQds/0rGU1DTqXGmoxuzzX4h+Ozpk4LSkXMiFLeOM/Mq45OO1cdWraVludVJqWyPBPjn8Tri00W4vJbgpa2sZeXLYyB1JP6VyVJXvKR1U5JK1jzT9mzT59XVPiJdAmS4keSMSgnyk3Bti56ZHJxWeFSUuc2cdW2e0fGzUNJm8PpNYXZJuoVm3kFvLbdx05xwRx0rsxcrQ0MqUOaoeP+OPFGpatoMxvLgiLje55boRhgfvL2/GuRTco2NPZ3lofNsyXS69c2twiqY5N684LJnhvwrkfKpvU67vl1M/Xtcur3UGjku0yY8eepyJB6N/jSfvF0/huzkNTvbgXEllBb7nJyx3Y3e/1qI3T0N4r2hlMrHdv+WTbnZuGT/jXo029ik7KzLWmRGSdXdCrBs4Hau+m43T1NEklci8TfDe+1/4mWNtZxAJqcStNsYNjHBPPSvOq4Sti82jQjtK3QzkpSmktbn2J8J9E0DwR4StPDWkQrHHGo8x8cyPX9CcOUsJluEhhqS9X5n0GAoqilbc6+18GWWpavb6/qVuHCPlI8A8/SvrrQlF3PXnTjJqTN7xVo93riB7C4uF3MAsYPAA7YFYwkqcrLY6ZqLguVWOhuJRp3hyOyuQMiHBG07mP0rjnJTqt9CFHllzHH+NfiukWn2vww8Oam7yTHddQWqZcD3/ALor4THSrZ1mqwcY/u1uzxK37/Fezgm3+BZ0PTk8OaYloGEcwG5wZM/ma/QsDgqWAw0adNaI96jRVGCSXqYfxX8eT+EPDUbaba/aby8mWO3gjBJyT1ra0ZzSsKs5U9UbHhW21Gw0qJLmRxdTIGmeRjiPIyeaqpOLRtJJannXxw+IM/hO8j1RdPn1RYZhDZWVqhJnmJwCfYdc14mZZpDA+zppe9LoebmGJqUEnFXb6G8j6nqOkxz6woS4+zq8wfpGSM49yK+iw6tCy3aPXoqUaMXJGdcarY2JijspcTNxGVXLE/3iO3tWsYSjJXNpTjVgnY2vAWl2mhfArx18SNX8TLbXGs+LLHTbW0Sf95PHDAzsHGDhMt04zX8t8U4yrmX0kcswcYXhQozk/K7seb7epLN40YxdrXv0OIl1tQZZ4ovnl6TOu4kf7K9q/q6nFOV0e/7Pnud5+zG9uf2hvA2nSQJJLLrsUkgcguxBzyMjpX5X4+Yl4XwhzW27pNfeeXmE5U8vru/RnMfs9/EXT9O/4LJfGj4RXmnWTxfEPTNS0+61A3OX3xxRvGhUDAwFIAxnvk1/K2AymX/EruW5w4JywtWnO7ve3PZ+h87KlKphKc4rWm4y9dkfOnxJ8OWYW/8ACOqyRNJZX8kOyOUMflYjII+ma/tPJcdh82yLD4qk7xnCLVvNH02YUOWs13PItQ1bWfC11NDPMZLST5Dfbf3ir0w2fbvXpUZTS948fEN4a6ucn4iuNI1mS7uhGjWyjAYTbti+49SazqTUupwuqpR1PO9Ukv8ASbOcQ2asjROY4HHCknggdu1ZuKirkNuxxF/ql6Y5JJrITXcoVJ3PXb/dFcOIqSSslqebVnNvQo3EUUge2s5PsisdwkB5f/Z965qMfe00MlThD4NLm9BpWpWFvHJLqeIDBhQ2CG9iPX+VdU/adzvoSq8vxGTqGk6qzzNJqLhpFBuGB4Uj7ozXDV572TOfEUatTaW5Vv8ASL2e5klvL7MkcSgBejL6n1FcrhJTu5XM1hXDWUtURalpCSQyWcku5dqlXDZEn/161xTXJysqcozpuDM++sbRyLfzEGUGSP4WHTI9a82VpK0TCpTouDS0ZTVnVDGIQsyfMSR/rB6gVzudRp855lBy5rSWq/EYFkvSbmB9oP30Tpj1pUn7TWL0OucJVIXjp5FqG2hgtVMiEKQB0yW5xge56VGLaoUG27JLc541IUleWiPTFiufE1r8Sh460IjxFoml6aun26NhdJtIdq+QMdCFI3D1znnNflUZPC4jAzw0v3NSc3Jv7Tez/wAjHB4hYqWJn1VvkuxwOjaisqkTuGDAh379OOa/WcK+a8WXh8RNNxR03wa1drNvEiMoC4tTuDdPnYc8818zndOnT4lwUnv735GOFp1fr0uZnvXwE+LV58J/HVl4rW8uIrMOvnyWdyY3xnnnt9a+grQVem4I9WpS9rNwklY/VL9j+y+LXxJ8Z6f8WfCHxUvrzwbPaYm0m6dJcPxg7xz618bmuAnSq3bPKxtGlhYOEo6n2PFKFRVU4xgYxg1510j5/luzTTUYoYwWJztGc9qm91qLlaK+safD4k0mfSbqZhHOhDHOOKLRe5rF2ewz4VeBNB+FfhpfD/h+JUiDEgL6k5JpQhGmmoiqz5pbHSXsi31qYJZtu4euKHFvRhBJPYs+HYLDwzozyXFwCq/MWc0StTV7kVVGo7WK+k+L7bxMrXFswZdxClT1xU0ZxnFyRfLJblme6t4LhY/vu/B46USavYq2hn+IJpYWEDuCMg4U1pKUoxshxVkfnAJrew/a7+OOmqEMbeOjOB/fD2kDgcVvgXVnKXN3PTjOlUw0JQd1ub3wdkgg+KPjMyW01jDPf2lwjCNv3swhI8oknkDA5A6EVrVpfv25PQWIqtxhF7WPrz4P6jfz6FLf3yr50VsSF9McDvxik5wjBs8qopOqkjnfG13Le3jNPIQduRxgHI9q4W3J3Z6dKKirM5CzBN0cMowwyAOuBTg7M3laUNBdRt2ubIyNbj52P3umPWlKa5Wwpy5fdZ5v8TJZYtLZbiUozozKM/fGcd/ahTikwnF6Ski/+xvf3OlaB47uLBCrQa3aScZ6GFa4YScq9RrQyqQclFn0LqDP4y8Np4o8OupvoIx58K8F629nzNMx5JRdnsc9NeJ4+0R9E1q22SocDecPEf7wNXotzW/LFcqOGju9S03X5PBnj2/RlIIsLpj8sg9/ek7ydmZ1Xy6ox/Fs1tortZ6rbfaNMzgOo3Lj39q53Lk0aJcVUSuebeOPgl8MtbsDd+BNYk0W6u2IaTT59gZj3Kjg01SpSVzKbi9GjynxR8Gvjn8P2n/4RzxPDr1oBhkvchs4yPmHX8qznBr4Tkc+aTS0sec614/+KNvDJFrHgq6guYnBklDAqcf3a0pJ9Tqimkctf/Gq6zcPqulzwLKMM7QnqPwpzcYszcmtDide+N2nXz+ed0LwDam+IhZKzvKTsZ+0bkef+LPiBcancyT6GgDEfOoXgH0qlTUd2Kc5N2RyOo3WqaoWe8mIZ+GVe1UlCPQUYzluZF1Els6xqpLkEHPXNat+5c1domVqLSvIY5CQexz0qVtc55U7y0Lmk+ZNYGMn7vSuDFyco3R6GDqRj7pC8gRyK8lKXIelN80TyX9oLZcTwRjrn0r3siTjKTZ8fn8Jc0VY87Fhf6JImp27kY5BFfQurCrF0zz6VKvhEqsD1f4QfFGLVol0rU5gsoIAJPWvk84yepG86ex9HQzenWSi3qf2SabbC1RhIrZYbkKkZz6V9ZJLmZ5uJb9o/UkuTcyT73Ifeu0byDiuaUVIUG1sWppDpVisWEWSTBzGvOPpWsY2VmO8ZLQn0yTfsE0o3EjnHJ9qTVhWSdmb1lOIkMjcBckhuf1pRundkTSlLlW4QXUcitMU4bJb5u1Q5JaiqU3FpII763mk8tRkbsAk1k6iehXspxjdjbq5JvUgjBIA5OOKy3nZFQgvZOTNSFtsG8vjHc16UJOMDyJrmqHIeJTG99JK64KrwprK+lz2IpqlE5PX9Ls7+yaV5lilIxH853KOe1RKMZfEa05Nas8u1qSWC++xTTFCnIcjl/rmuZySTibJSqO6MK9u0tBLJbwy5YEEBgxdvXHHFY3cZGsrSaXQb4f8d614f8Ny2GpX6R39zIzzSswykfpzT9vCEPeerD2EXO9tjudJ8XaXZ/CyGKzlaNXZprlpJMNITxz6/SrU5SppJmVrV7o4DUPEtjrtzLc3cjN9hVS0TNhQT91B68/zrnqyXNq9jSonZRR4x8UtXkg1ee+u1Mk0zFHkU52dMIv58ntXnOT59TqguSGh82ftDX9/48k/sazRotKjkSOcA4+0sT936D9a5ak5VXboddFRUuZ7nafBCzTQPAkGnPNtlmtfNtee6ghh+QFdVFqNOx0zcampc8UXurxao890jG3SBQYV+6jZHzL7GlWU5NERlFR0RyXxBEJ0O8vhJtjuIfNhRe3PP447Vy1G4RHTdtz5n+K2q3VpPHf6VdKZbZwELZCyRsOhrik23c6HH3Tk5pr5oGnulwz8goMjHf8AH2rRORdON0Unh1GaX7ZazJIrD7rHBX862ppM6tIxIZ/LRv38S7tvUN0rup2uYfFIsaNK3nD1DcZ7/jXfTguZM6Iw5mevfAnx9FN4lvfhzrvhvT28yOOex1OWD9/HjIZQ47H0Ne3lee4bKMd7HE0041LKMmtU/UqjPlxPKe8aLDY6fZLftaN5Yb9xu6MfWv0vKEo13VlJci217n0+Hpubumde2qx2tmi/Kk4j3MVHQelfXqfvpLS518/NotkVvDGr6zqs26K+AQMSdpI2/U1tVUYxTTuaRkop3Rs2+pz/AGiSXULjzHUbQG6H1rlkrQdkKcm2omd5Oh6NdS6tZ6dDFLKvzTbcFvxrTDUIR1hHVmlOEab5krMgu9TsL2QMZGdEG55SuB9Peu3llGOprz2SsUob6113VmvEtllSzH7osnesJ6GiuJqer3t1IljNKTLK+GhUdR7ntW1OMPZ3ZHNy6nn2i6Vr6fFLUfGXjDVo2s7WIRaFpir8ob+KQ46nt+FeHTyqdfNXiquqWxwYXCYr+0JV6r93oaWu6zNNHKJpm8xjlYgeXPqfQV9JKL5bns87lIyLk2ttbtrGoam0cFuQbuVDlpT/AM804/D8ac8RKUVZakVVGnETTtY8H6n+zrZ3Ol+J7q91fXfiDdv9ikfbFZwwwhFQLuwzHdkntnpX8rZBiszzX6RWMc6aVKhQUb9W5O/yOCjXrvM9V7ttCC0l07S5WIlWW9C/vMnKjHqfQfrX9a0oU4u6Wtlc968ou93Y7X9lbUvElr+0h4Z1XwnYrf6t9qd7W1mbajnYw2gkHaPfH4V+Q+P1GjPwozJTk0pR31dtlov0+ZzYqlhMRhZwxUuWnbVrp+Vz5w+JHxEg+Af/AAV3Xx7BpsulyeHvGFtc65ZzSbg/nELcMzNy25XbqT0z3xXwHg9lUeOfoyV8jg+dzpVOXTrG7j+R5uJqKtH2NF3jKFlbS+mjOn/4KFeF7P4cftxeKJJtYs54dYaPULWO0hEUdqsqq3lnBILkFXPTh1PQiu/6OHEP9teHNHCYqPLXwrdOS8k7X/C3yMqWMp4zAU5Qldw9yet2pJJ2fZ2adnrZp7M8a8Sar4P1e4eyuraKZZPlibaM8dSa/fJe9J2fu9NAnCFSOp4r46+GutW9/LP4XmH2KOQvJbRsP3jHkDA/OvPqYRt80WeZUwk41Lp6HEan4ouleWx1W2SG5kIcbhkADg8/QUozcfdkY1Z2dpGQpsLtp47SGNVWP5JQAQB3A9TXDUjzyuc9SKktEZ9zoVle3sc7TKqQR740PG0+rDsKujCFzKEadRpdUW9Rt3a2htpJ2UFlIfBBZD1OOwqMQ+iZ0OhOKV+pDqzQWhd4SUZkDIM5DqCfmP4fzrllAcoNRuzE8QgwSia3Zo4lC/OpyShx1/GsXBKV2eXiatpK70Mu9LRucMUMiGTZnK5ByCfTipq04y3E0krorvHBdzGUny5DGCmTnp39zXGowjN2OPnU61upVSK4BKXB5jXci55HPUVnV5Zx1NVCU1eXQV7lixl2HpgnOMe9YQlGDdhPEpPQ7j9nnTIZvFF/8V/E+nrNoHgG0GpXMUn3Lq+Y7LK254O6XDkf3I2r8943zStXpQyvDStUrvl06R+0/u09WeFVnUxGJcX8Mdfn0JfgfNqHiDxZ4sh1K8mluvEHh2/mvZScvNJgyknJ7msOJ6NPLcpwrpq0aU4L9DfK6apV5xX2k7nGaJbKLUG4H7s8Ag9PrX2+FxU56vY7KcY05cxp+GUnFh4jvbTUvsr21tbuyhcrMA/Kkge9eHmyqVeI8GpK++vbQwhXlLFyXY7v4V+OLTxbaHQNUKASKFBPJ/WvsPZKELo9bDOVde7ufan/AASZ/ak+Lf7PHxbk+Gepa5Z3PhC7nRJLa5ucSwlzhXQdxmvms6oVa8VNdDHOIQ9h7+6P2Q03Uo9Rjjv4nBjdAyH2NfLqPc+P5n0H3l8TMRGxPHrQ1oXF3Ra017iRMzTbRj7oqbalJpM0rZ58hcFVPbPWtNLDkm0XftigrDHjgdRzSe5Ck0RaxFPdQfZ5pGaJuCmeCKyqJt2ZrpuXNDg07SLMRW9qqDb26URioqyQTm72I7rWEM21FAYr94nrSe4km0ZesXdw9yGkYBdvb61o02jW+mh+evjFGs/22fjKqYH2jxHZOgJ7vp8GP1Felgly83qd1Cj/ALPG52k+ozQ+NNL8RXd3czvrMhN+0gIEVxGu0KMcEFRmtMZFyV0OFOnGna2x9K/Da6f/AIRK9DlpD9k3M4GCFLA/kP5Vw1IRjByZi4J1FYyfFcWoWklvLcwoFvbcTWrbwcoSVB68ZNcbnG1yoyUk7dDm9QtdQsZ7i0jSJbqKZVZZj8u0N83I77c496uDbnZla1KSlB7kWtxWUoiu0ilQ2yyi12SHG18Z3IeGIwcE9MmtISVOk4SV7spU5tt31Z5z8W9G8RSWi6hY6E81sls0izQHeqRBgrM5GdnJHX1HrXPVjOC90pyoyaTdpW+fyOr/AOCflvY3cfj6yvlXyrjUbVCScnPkDk151CfPiqkTXEVYxowS3O6XxBqPwf8AiJH4Z1NjHp+oPttrovwWJ6HPArvquMFHlXr6nKpwlHUf8ZNE8XaHMvj3wwVmgVv9JgQ5Lp/eHvTcfaQTTMFUlNtW9DI16Hwf8T/h5/xMLlC0oxBOhHmQP7dxg1EZJaSKvJo8f17xJ4l+Ejw+HPi3OLnSp322WqoDtK9ll4wDRVilFS3uZOlJvQreJ/hjpXiLw4fEHg/X2tmjffbiKbK5PfFZ8sHGyYppxWp5X4q1n4/eCbq4stUtDd2aKJGmiU5IxXO+aDOZ2ucWvx3tdUuhca/pbxRyRmNhLCQNwq4ykaJNJNo4XVfGvg/xJNc2KGHzYn4UqMdeaGnfUJTRwfikeDrdmdraCWHnMfG5DRz2MmlJnB6m3hOzkcWgTyn5Y55B9Kzvzamiajscdrmp6VJdSJYRksBgNt4raFluZuq7mEfNuHa8uE5A9Kuc9LISblqZdysjgylfvHjFRZvQuCJtJl2AfL1BBrOVLmhYmNT2dQqXT5uWWvJrQdK57UJuUU0eVfGxozqcCE8k17OTTnKEj5vPKt6kUzNfRo7vTAjKMFfSuhVGqmhacXhkjDg8M6lp+orcaZKUIPBBxXpRxtF0HCqeDLBVlW5qZ/bDbR3c9qbtFACdGc9quo25M9avd1HbuO0NNSXVGvJnYRr83lhePrWVOMlMTaVOw+6ubi/1Bpln4JwSqZFaXTZFPmirGno5cbfLKkBv4V7+9JWlsdDbUdjTu711UW8pLqoy6rj5j6VE5WdmYU4WnzDftgu4i1vaBf7wL9qyqS5tjp9lyv3pE+lzRISqwZAXk4xzWSuuhnXjLuSG7a5mVfL+6M9KtXk9ifZ8kHqaRuI/siq67Tj5U7mu5uPJY8uNOXtXY43VZyb6VZJgqkHCv3rKyR6ig7I53WJ9Pil8+dG2MpGVOCB61jUsPmblaJyfivwhpHjWza30G2dpEQ7irEj6k1yVVzxtE6IOVJ2Z5Tc3Xij4dauttqMUUwLbBLJFnYCaiClD4jrUYzRtNoGi3Ok3OtauEkugmFjMY3ck8kAcE9hQ6dL4pGVSUubl2sc7rMPiLwl4YnTXLBdm0y2sKkgopHG4etZqpKnBtozTjKWjPPPh942W80G7l121ubRmumESSEZZRk72OeP/AK9c0armndWudUnd2toedeOdYm1HUbixt5pZ5WJ+WNchAe49h+ua5qsYrQ7KdJRjc8u+JZTw/awwQ5KwKHlSZxmSY8AZ4z17dK55KUVZGiXUs/DLT7uLwcIp9Vle6tySs2MmJs5IGOnatcPScI3bLs5S1LvjDxNruoWhvLm13Ep8k8ZyJNvXI9D3or1GEVGLsjyfXPG/iLVNCurr7Cn2aJdjQxTbymD1x1Fcrs43ZryxZ478QLyTVmeWGJJIJUUEIeVxxnHrSXIzopRd9TBtXltbV7c7EfHyjqHHqQeho5Y3Nm9bIrXbSEIxUCVuoUYDfStI2CV7WM7Ub427tEy5lPVH5C110e4o6bE+gzTJdAOw2sM8Pmu2lUfPynVBrqe2fAf4NeKviX8QRe+Gr77I+n6RPcyySkAOI4y+3J6kjj8a7s1y3+0+Gq/s03UgnKKW94psKVOVapPkV5Wue/w/tGfDH9orQbW68AW1nYDQLBdPm0uKHy5lmj4kkkU8kls8+mMV6nhFndWrlKy7H6V1qut16mnDWMp1oSU9Kjb0bGT6jqdzrUNjBb5UwhWk21+40+d1Euh9TB8jN+0vNO0Gye1QbWGC4IxXXKEpy0OhJtanK+OPjD4c8NX6wTShppOILKLl3J9q5JzhF8hy168ac7dQspNW8XC21LWJprG0TDJbjgn612UXLD8rpnoK8qabLF/fSXcy6TpTMTghYkXPHqfSuiFRvWW5M4pTTuP0iabR7KaKeZ1ZRhSV53VlXlztFxkovU5zR9T1PxLqF1dwyg+WSpZiRx9e9dCcYU7BUVpX6FK/1JD4hNrFdfaY0XEyJ1Zuyg+lYyqWg2uhpGcpbIZe3Onw6kza0TboEIaPqznHTFU6jlTumHvRdzlvGurtL4bmuLqVLRFQtBAOMgHqwHf60RioJzT6G2IqQq0/e0sM0m8u/BXwc8LaPqmlW8N5HZ3upo8EgeS5S5nOx8jpwnSv5x8KqEsV4g8Q5wv3keeMFbf3VdpX89DycAnVq86baV/vKVlqMaFZlkMl3Iv73d0QdhX9RxhCM4yW7Wp7sOecLydkj039lLXfDXg79pvwpq3jnxc2m6fZvNc3N8suAwVC20kHhT07da/IPpE/WX4S4+OHjzSaSSXm7HPiqWKxOFq0sNDmlJNWPkX9tLVb/wAS/wDBRfx1dWd1ut7wxXEUs4KeWjoCrYxyenBPQ5ya+a+iTi54Dwyw2HSTalKMvK619fwPOqOeDxNKm9JKMT6P/wCChlzq/wC0J+wz8L/2rTNaRnQoYtP1uWwGJCij7NL5g3YyJFRyQTw3TufwrgbE/wDELvpFZpw5Vk1SxMnJJ7Lm1X5s6ZYWlHCVqcZPR+0+T3Piq/sPEPhsxavDbx32kmE7JoG+ZFPXgdT/AI1/c1ZOglyr3VseSnObvHVDrfxtorW0qeHbkSoFcsZQAUBAB4/vHpRSmpQujpnKnOCdzl/GHhXw54jhlu3soYRHbYk2dWc/dQe/rXFUjCUrSOCrRjfU8v1L4W69ZXF19g1N4YoF3th/lX/ZHvXi4jDOU7wlY8vEYCdR2hNowtO0TxZY313cee8hmAK+d94D146VyRjiaV7O5w0MLicDKTcr37jYvFOpKJFu9MlZUBjlndSy7c9q5lXxM5e9FlU8fUrNpxenUp33i2TVfMdgoLJ5a7wRsUenpmtXXk1sbvGOcOUz9V8S3V6wLWTH9yFEfTGOQfzrmliZ1JbHk4jEVKs9IlJjq99KHlGF8rYQTyPw71NWtUbLhOrU0G3mnavcNDmQpKkeUZP4hXK4Tlrcmtg6zamnqSpamdWlkJE4PKjvWbvUVnozSC+sK0nZof5ZiCs8LPKzBEjTkyOThQB3JJArDFVI4em5yeyMa9Sjhqd5bnp/xsjX4R+ENL/Zo0+RUvtMk/tLx1MP+W2sSqP9HPqttFtixz85lI61+e8PUZ5rmNXOKv2vdp36RXX/ALeevpY86EXCFlvu33f/AADD/ZgU3/x207SJEdvtmn30JAU85t3/ACFdnHHucOVG18LhL/yZCwLk81jTSve/5HKpM0NttztMcpVlz3BNfW5dSU6FOd+if4Ho4i9OmaHgy8TSrmLXViEkN00kV1E2MSoDgr164rKrCGPxk3HSVPVM5sBUi/fa30FuIrPwF8QVfTpHNndgTWRm4IU9vw6V7GEdask6m7LjXqYXGcltGfSfwnOleJr/AEvV5rtY7+0njl0+5i7lWB2HnnNcmYU2qUoHfXhKvB3P3S+CXii61f4U6Fqt4rLNLYRlg3HO0V8BqnZo+WlQdJ8p1A1HbiVzyW609UHKaOn6jI7NISMg8Y6U1ZESi1qjUstQRpBGJd0mOcniiLuy0nyj01EW1yZJSOB603KxnJXF/t2O8lCowwByqjNRLXU0gna7LjXLIioxAXHSqViZNuRXilso7gzyuzFuFXP3azaNVblsUdW1VWdtx4C4DHjvWjl7o4wcVqfBfxnijs/23/iQnnBGu5NJnGSOT9ijA/8AQa7svbnKa80dkajqUopdDc+IeqSJoGlX1siwvaalBKGY4yHOCN2enWvTrxhGgdOFw0qsuVs+nf2cJIvE+nXFnqd5AtrHYzPdNOcAIFyo25BbJwMA14mMnL6u3E8zEVfq7t8Tv0K3xFu59ZnS/wBd0uC31KR4MyW0GIjEsRjVgOApCjoB39hnnoxc6Kc3f/I6I06dC6graXfm2cvObq3uHtIWMsRBdmV85Zc8/kTWsoqErJmmHqSqUuVLzt/XYpeJbs2+kz6lDG85it2k2Lkl8ckADqaEueolJ6PqaQg+bR6nH+MrW4tbHW7XT9Qumtb9zstWGwmIKGYui/7QzgngAVjOMI42caDbX+W5koVK2Hg60VzK479h3X47TTvHcowmNbt8KeufIXrXBR5FiKltzavh0qcGe0eONI074v8AgybTLp1NzEN9rNjBRxyOfrXZzRa1Vzl9kov1OS+EHxTvdfs7r4f+MLkLrWiqYri3PAmToHAPUEVzKo6cuVl1IKC0OF+LHgfxv4D8XW3jrwCGubBJDJqWig8MOpKehpVE/iREqkXTIPEPxm+F/wAYLOPwRq0UTm6XbcWFwuGh7EYPSkqinozmVZnkHxH/AGfPij8M9MfU/gf8QJm01nP/ABJ7xt4QZ/hPUCqp0FF6PQhyurs4zWf2lviPo9vJo3j7w8yPHbiJ3C5B96K3PfYzd27o5aTVdA13TDc6dHbyF23SKwBKj2pQV9S4q61OJ8beE/DVyzPplmsbImWKjlqmbsxvkZw2q+DNIk3T3Mg8xhzufGPrUtNrQh8qZxXiQ+CtFG+6uYSxGWjRgSTUqnUS2HOUUjir26i1KQzWsIit0blsYNdEKWilc5tZdCpGh1BmWEYiUHBB61NSEos2SSVigsIlgaNgA0bHirUZN3LgrIqwAxSFEXgN0qpK0jnavUuVNSlC3pIGB15rxsfC1mezRl7p5F8V7mO+8UQwRtnaea9bKISjhpM+Zzr3sVFFsAQ2scYXnHSrp025tnVGP7tIt2FpG0ZZ4wcjPSuGupc1rndhaEJRuz+x1pknXy5Z34wQsfIr6WpD947nlyTU22alrIYdPa7e2dCifKD0Ip2sjmqTU5WMW0DzXTSl518w/u0jfisbWdzopxUVdnUaM08VrmdQ4VclVJ4PvTSa1Co3eyIJLnzBLN5jtg4ITpj0rJ2kiqaukWbO4uZrXyrewMSMfmYpklR/n9awcpXslob8tFVOdvU0LNmtrXzrhQTjO3Iq0uRanPXtOpaI+0vLq5iJEfDZ2rH1FWpTcdBzhCD1ZM1y8REM0wXA9cmqjJvRmfs4vVI5zxGkI1NrwIJCgBEbnGDVttmruopHI+I2h1jelwyqXUgBH+6PpWc7LcmKcZXSMjRNWvImOheGY1wPlnfGD36mojKM4+6dDi370jJ8eeGdP1OJ7ae7ea4aP5l3gIp9c03CC0bCE5djzK3XXvhx4gtzrql9FFwJZXU7ioHTPqK4pxVOV3sFSDqU7rcveN/iFZeJ7ea7hkWaO6BEbgggDnDH2ArOpLnWmxdKmkrtHjPjS5vToV40NvmOCIRWxHG+Zj984HPT2rhqTnyto2ULSWp4B4o8cfFmGa507TNLtYpIo2M1wzsu7HI5HJPt+FccJVpq7O+Kp2V2eY6foXxV8fePotT8WeIS0MJPkW8cexEPbI7n61ivazqq70OhJTR6loF/4g8N2dxYWyJJFcxkpLu2mGZeoP1r0EmohOS2RmeLtc17UtEB+1xW91CMT26dVf8Avj2Nc1VNhJQUlY8v1LRyfMvLedorlmxOI+/ufUVi4Nm0Umclrunx6dbyaldQxPan5peCQp9ahpJmyqKKPP8AV9Z0vVnkt9EleYq5/epGRtGenI5pqDetyoe+rxRlTNrUkcitrSn5ujWxG0Dt/wDXrVO2lh8km9WUpJJEIeX5txw3U7j6100Ggvy7FzR9RtdKvob29UyQxzK00an5iueQMjvXo0YKWiZooyldJ2PoX4O+LZ7/AFS6n8P+IEWzmuG/s+3kYrJHAekbY64AA96+r4TweLo15+1mpRlp56nZhMLKNTmkztPD3wi8DaR8SdR+LGiaMYNd1iNY7+WCUrDJtGASg+XPvjNfYZNwXkmV5l9couXNrZX0V97GmBynAYTFyrxWrO2trxoF2xO7yKfm8sbi35dBX37nBJO9kfR04uettDN8R6hcecYGvwqzDCJn7p967IVXZpGvPFe6U/C3gDwxp+qzeIrh11O/OCskq58segPauKFJc7lIqFKnCfM1qaWuapHDKwExjwuTnnnsAK6VFs0clN6OxS8OzX9mk17LOEeTnzDne1VUULJIEmVrjxLLq+oS6PZy7ljU+a6nJH1pypxhH3i6cG5FLQ/FWlz6de6bZIirEzCSYnqwrGUlN6dCqk1yuNjifhs/i7UNQ1bxP4l2WdrHdbNMVR8zD++fXvXLh41nOUpu6OfCwrWlOe3Q29a1uGzR73yUa6YExzTnJA9ea9LD0HVvE6KtSVlI4fUtN8TfE+90zwF4eVUk1vUEs3uXfAG9xljk84FeRn2Y0sjySvjJ7QhJ/gS7Vmp223Lut614Atddk0Lw7rNzKdFu5tG1WOcl1RoZWELR46IYyCeOua/H/Ar6x/ZmLxNWKUa9T2ia39625WHf7qXKrK5j3GspZXFxcq2IRuKoBgzt2HfAr+hasZt3pvRdH1/y/E78PKKjafU9N/Yz1G9g/an8PXd14b0/XZri0uTLpmpFUjij8v72T1I6gY5r8c+khHk8GcwnzcrfLr21XYcYfWIzp87p6brf+mfJH7Quuah8Wf8AgpN4+utVvbTQPD9lJDDf3t7YkrarjIcRrkyN1IHfivjvo1062T+H1FYePtG05b9T5jHTxcc+lCPvRpwir93ufXn7JWi/Dj9oX9iL4rfAeDW9W1e28JTPe6YbrT2tZLqK8h8s7YFcgqJItwDEcn1r8h+k3QzLI/EbKOJKVNQnXSjJ9nCSe/p1PdyvNZLFRo8t41k4NtLTt+bPgLwJrmt6LZ3T6vrdxPbaCnkX+kpagSK4fazHBPyjAzX9k5BmVLNMno4+EnKMoRbVtLta9Xf1/A+TpzqYTHVaKu+R2ZDqmj+D/GiN4l8N6glneu37nyWyHye69vxr1YzpV0uXRnoWpYpc9M5vXr3xJ4Mt0tdbstlqlxua+2Z3nu1efi5ypR7nLiq8qUG5lSLxpp3id5YLOeN4ApCIp4IHVzn+tcMKsauzOehXhUjzoh1u8hd1g02GNXv1WOMkZIX+Jq6oypvRE1asa0uXuV9YtdMRIPDkMMalWO4qeSuOWPTmlU5LKMTuo4WEKfLYxdX0PQ3RHtbEKrAY3c7TnAJ9zyfwrirwXY5K1GnfRGN4g0XSLe5MKKBEWKrIOcDA5+meK8/2K5tjndKDfkZt3apDdOjR7Cg2Mw7HsfxzUVMOmzixFCNKV1sVr37XK7M6lNq/KOwHt6VMKO9wUptalSaWLTohe310qoBkszfeNYVVRp6yZw1atHD+9N2PSf2ZdL0zw7o2rftkeO7WM6H4QuPsfgjT7pRjW/ETLmLCn70VspE8hxjIjU/fr8w4xzOpmGKp5Hg2+arrNr7FPrfs5fCvK7Pno1f7SxLqf8u47eb/AOAeX6v4hnu57rxDr2ovcXFzM811NM+WlkYlix9ckk19Zg8NTwVCKWkYqyR04nEUcJT5pP0R0X7I2u6on7T3hTX5AI4n1NbaOOToVkUoc/8AfVePxXh6uK4Vx9aW3s9F6NP9DiyD6xVzhYytpHWyKnjC1m0HxF4i0qcKDYapcpwOm2RgK9HI8UqmR0aqejgn+B9HmCf1epJPa5U8Oxy3vhGJG+/E7ORj15yK68vfsG6st5HLl1GMMsg3u3cv+KIRrfgOLVRc+Zd6ROMIcf6tuuO9dtOc4VZSTev4HVjKMKlJVk9Y20PT/gFrq634afTYLgLMqkwuh2nP862qvmak9j0qdSM6UZJn6a/8EfP2ndSfwVqPw/8Ai78RGlaxuiunW9/J80SdgCeor5fOaFOnU5oLQ4Mxw/MuZI++ItTtb23F1ZXCyRPysinINeApc2x89J8pq6XeRxxbGbGR0z1p2tqZ3uzRsLq0QtJDJnPUtT5rLQtSurCype39wIrduN3zN7VF7lJK5oxLpejR48xWfGSc96aT6kXkyvqGtoxG1gPQ5olEcacmU3vJWmEhG1dufrUJWZsoqxleItQv7m90qz8N2aTeZqKjWp7qXYltZhWLumAS0mQoC8DnJPFXNOVrDSbvzbdD4u/agmms/wBsnxBe2tjxd6FpEmLh9u8iN0HbjO2vRyyUfaT+R10aPLQiXvHinxR4R1Cwt4RbLBsItd5JUgZ4PXrXZimpUGkdmFfsnaTvc+kP2b/CUUvwyvvHGq6VE2m6qtnptvczylmiuIyszEpjIBwuGxwRXhZnVlHDKmoX5mlfseNCCrZmoc9nG7faw/4h3kGovb6bPeSxxQ3JeYwSMCQrHZ0KnHqM4wSDkdVT6K+3kdMVX9nOUkubVLrpf06r7n16nKrbz6ZPLAuoecUjMxPmhgokwQuR6bgNvbvVUqM/ed7pf1+pdOop04yUbEs07yWiQ3AVo1JKjA6nvWntKns1Dpf8zRavY4jxtqV5ZXt9Bau6i1svtEkg4AVjsUZxySTjA5xn0rz1JvNZQTsoq9/X/M6GnHDxb+03YrfsQ6ZPrel/EKwhk+f+0bdwe4JgU8+9c+DnCeJq+pljG404WPYPAniFNKtZ7bUn2TWrlWDNyfeupQlF3ZxOTepyvxj8DXHinWI/iB8Np1ttfs4slozhbhf7jeoqaqVSOm5pJ6WZzHwz/aHTxXqF54c8eWkul61bOI5ra6OAevzKejA4rODlu1ocEp20ZyXxW+AXhT4leKn8deE9UbStXtFPk3FscLMfRuxodJT1izKSerR5nffG34qfC24bw98UPDN26eaNl9axmSNl9SRnH+eaVN1Iz5Qg5zpp2t5PdC+NvGHwu+KGk3epxzWrubRRvRxnPvXROcZaCi7ux5B4t+AGoWdtLqvw98YCESwhhCGBAJ9qdKEe5pVkkrI8o13wh8cbRitx4igMUYxlV/KsJx993OSUZLVnH+IvAvxIubwW+r+K3TzVy3lcbvSikrbmqp3jdmTP8MNJ0VBeavdtI+3c7Svu5HatHUmo8q2EouT1Zz1+R4gvBZaGpFsDiTANaQi1FO50LljHQ0LfToNNi8lgAVGOBVNXRjfU59IC+rTRyR7RjI96znN7Iy55c25XubcwymQeuR71ndyHTfvGB46vl07y7grgOvNclak6iud1OtGlrLY8cvRLrfi83O0lVbrivWpSWHwljxFTnjsw5n8KOia2MrhBxj1rBVPduezWgoaIvW0SxQkE/wAPpXnzk3V1NaScKTP7CdOMV5cLBpTMvzDexbGfavqZa1GjyqnNGbv3Zd8S3psbdNJM3GOGJyRWVR8uiOKycrmfpd0skxhsjI3lgFnOcUo66nVb3dTpxc/2boD3I/5bHkKDk1M5tR0CUZc6S2M6y1C4vVwqRxxxvy5X5h9B61zSemptThymtpdxcyT7lllMYXcQ4I3fgBU05SbZVX2bjsW1vo51ZFGxmyAaU530MqlNxdxdCmiFw2npdEPIhy6VVGV04pjrRfs1O2iGB7O1lKXKNt8w7CTlnP8AhRBKErM0mpzV12+4z7uz1C61K48y0Kq8BMe4ZNbxvzES5HBO55d4mhK3zWkjOUdj5rr8pHtk9qxqpsuEowQ7TotQuLA6N4XuoRknzcIW/M96zpOzsmTOo73ZgX3h3xFpeqJda5qzSWqODNaRIFBPbJ5qasJqV+bQ6IzXJoh3iHX/AA/4h0pglhbtKvyxIz7kiXoWPHXH86znJyjqRKFlqeAeIPAPjOz1m4vvhusclrOz+dZTDEVy+PUcrj1Fc0qNSa/dGkJOpZT6HHn4pW2sLc6Pf2bWGq2Uqx3GmTAYQjjcrdHHoRz681k9Ycr0Z2SpRjZ9DJ1vwrJcW8ai7SN5pBLdTyZIUHPpnJ9KxdJwp2CMovoYmm+ErFNUtrlrSKEPdYBPGNuACwqIU0mjf2iSsil8QvFXhi30rURp1urXZuwhsoo8jzARuIPocH860nVULmkYN2cjwzx/4Z+Kus6lcaro2p22nSooW2C7n3D0YHBPHauKrGrUd0ypKLmo9DjdS+Gf7S8LJqer+OdNtVkzieCwyXA9SW9e1TTcoSs2OoklaBzms/C74hahbKfEvxEmuI1fLxW0ax98klQMkVNWUm7JmlCE5RtIy77wxcWIWSTUra8K/wCqZ7fCsB2YgDn601KTjY7FBRjoZV/Gt3O0TWKRyADHlswUGs1e4LUwbyQPMYZ5DEy8BdxOf/rV20YW1QSSS1KmsXCW9nGofeXl6Y6gV6OGu5hGd5HYfDnxLJol1FqEEu1QoPynrX0+AxH1aopI7o1JRtY+lrXxdrGs/Do3Hg14/t1wm2FnXIVyMZPtX6RDFVMTgH7GVm+p10asnG52Xw81XXfg/wDD9bbxXeW9/rlzbk3dxJACEB67RzisaeQYrFQjOtWl7uva5TjiJvmk9O1zhNM1/VfH2pzXFvZPDaLIUEsikFiTyRX0eFx804xpp22O2jObsrbHUwa+thA3h7TIArpjdMT8zH0r15x5Y87O2c20lIxJtQuk1byr+5Es8n8JI2xitI1PaQuhwemg3XPE40G3mlknCyyREQR5q4KM5bnT7Rp2RR0fUBoHhK91O+Bkup4SzArzk9BU4mpNrToaQlGlBz6nPeB4bzRfA0+s+KE+zrczSSyrnBwfujmuajGSi3JnNRk1Rc5dyLTPE0Wp6aL+4Hylj9mjGMAepNaU0/vB1vaU7JlTR1PxC8bx6LdXhtrGAZvr7+GJB1xxyfaqr1PY0XyayN1FpcsjovB2vBvjBoSeEI2W1sb4RaZbwv5ct3JnBb/eboK+B8Ua9LD+HeOq15cq5Gr+uhrQlGNfkfwnjfhrWY77xN8RvEDWaaMJvE8oOllxI0TR8MWPZyQSevWvC8E8NOjwlC8+aNk1K1r6HHh1fm5b2v8AqO0fxM/iLUU1iz8qQqpSzgkPCgdZGx+lftVFqdW9z1MPNSn73Q9Y/wCCfOpSa1+2jp6ww2jPFp93G8t6WIkbysnCggH6c/Q1+KfSfrxj4IZjo0lOmr7XvJbB7dxc2m7W6ep8ufFzxFN4i/a/+K91qtxJOq+Jz5qrbbFby0CKxXaOg4Ax3710/RswFPC+GOG5E7Wu+u/nr3PIw1OtWzHEObdrx/BaHvX/AATL+IngfSf2uIfAHxD0yym8L+OdIl0y7j1i2DxPPERcWpZQwO4SRjHXBIPFfL/S7yXMMb4df2nlbkquEndWspckvdkvuevQvH0KVWi6cI83JKNRXS0cHzJpO6umrrqmk1qeS/GjS7L4NftGePNNudJiSx8RCHWdDlHmbWt5/nXakvznHOVYDB4Iru+jtnks24AhQqSvUovkls9Vvtp92hyVpKWZSxEXeNWKlqrPVdVpZ+W55n4t8AeHvE9zJ4j+FobSdWW2je5tJGAg1CQcsBj/AFZ6c9OcV+x1sPKnVk0mrL5P9fw66dQr5eqtP22GdpdV3OP07xPqXi+W5h8V2TwtpreV/ZlyeS/ckHqO+a8uni3Vm1JWPHo15YhtVVZroUfEfgPQtQ1FWskXTpWh+e5t2272IzgjptxTnTpz20N6+Ho1IcsdGcdaWnxC0fUp7n+yDqaWcOBcW/ZB3x2ryJvFUK17cyPDpvF4KpepHmsV4/iTpZaabVc211K2D9oQjC9OM1qsfy/GrHfTzqhNNt8r8yteeNdBvLt7e3v4DAmZMvJwSBgfl/Wrlj6clrJBPMMNK651b1MjV/HujXE0l8LqHCw+XFAOQfcivN/tShGo9Tx/7bwKk25r0MKXx7Pc747exaUSYw5XrjjFZ1sbWmrwRy183daDjSp3v1Kz6x4t1Fv9GiWFW+8epAryZ18wnpscFStm9ZW+FHZfs6fss+L/ANqr4r2/w8t/ESWFhawNqHinxDenFpoemRYM11KemFX7q9WYqo5NfI8S5xHh7AyxNZuc3pCC3lJ7Jfq+hwLLauLxHsqlRt7vyRr/ALW/x0+H/j3xrY/Dj9n/AEm5g+Hfgi0OmeDLCVQHmUHM1/OR1mnkzIx7ZCjhRXm8JZbisHTljMYubFVnzTa2XaK8orQ1xmZ4WhKNDAJy5VZdr9WeUQ6Zd39wJ9WlDOPuxgfKor7yjhalWfNV+4WEy2viantsXq+iOq+HWqx+D/HugeIGdYxaazayhioycSqfauzO6CqcO4qg/tU5L8GexWdLDK7djvv2y/DD+C/jj4+0raUMuvyCNZE2k7yHP86/NPD3MFmHBeHlfXlS+7Q6s3oyjlrktpWscP4duZLW3eAMvEfAx94elfp2EpU/q6RMLqioLoifw5c+ZFeWkSK6zQsGSUlQw75PNdHuxasU+f2LgjT+B+rf2fczQ28pYQtkKwweD2+lRiItJRNMBUpex5H0Z9e/sl+FNE+IPxVsLLUneIasvkTT29wVKvjKtlea8PMG1QaktC8diXGi+Q/YLwJpK+CvB+neHmnaVIYVXe7ZJ46k18o5Qi9D5fldTVnSDUomCJK/G3Kle9Q3zGkYpaM0LfUIbS23SMQcZAzTbViZOzsXNJ8R3Ez7QwXd0xUx3HG5ca4hlnEty2UB55rbdlLYpzmxvdS86O4IRTwu6oqq70KUrIXVdWCKY1cFQuABzUcut2EW2zHfVDG/kxhVLDH1H0olOK93qaRioo+Tf2w9PP8Aw1TNcQWaO0vhHTnUOOPkkmBPtijLGo4irqd1PnVAPD1/Dq0199uuS6m1jaIINoxt64+te3ScJOSv0M/ZuDTaPbP2f76x0z4ZX/imW6nF5cav/ZemWqXRMEUMcKPNKydPMJZFB6gA15OJj7XHutF7aWHUxEvbxw0ErWcm7a76K5d1PUVaMm4uFd5CcblHOc1Ttaz3KhGUdL3MFZYUundlUbchfLbIY46/oKGlsdKjZcsiy8jtam4aKQqjKu4ISoZs4B9CcHArGpGKXNroKlBSqWvqcb8VdTt10+REVEYR/wCkN03sBgdTzjnH1NFZR9m3Favf5HNTVSrWuul0l0JP2BNZiN/8RYHkUbdTs1UAdW+yoea8bLqaWJqMzxnO3FPt+p1nxR0XxTZNLrenzRxOuSBtwJB7+9etV5nHQwi4RXvE/wAN/Ey2dgt68hl8+P8Aebh9xj2rmpX6jqS5locJ+0P8OvCPxEktSVW11Rmxa3VudrofXI5pSjFuxyODcrni/iS1/aT+AFy0V3HL4j04MH8+L5JEXGRxjDfhVRozUHK2v9dDFcjbir6ev4d/kZvh79qnwt44vbrSPFMggdUKNaajFtPPs1Y+1960hPlerPPPE3wy8B+I/HP23wNqv2ZmX/SILSf9059SBxVcsJbCpq7sUNd8H+JdA1UQQ6tdv5ibXSGXgY7A11qnyw0OiSSVjkfFUfivQrSW/wBTlEahP3cUj8tj61ytWMJXWiPPNc8W+IfEciG3sSjwryWGN1FPmubU3ZWZxHjT/hJrjULfTNTuTGLggsi9e1apNpt9DmlSn7TU0rbRbfRrIJuMbd8jlqamrG8uWOhTkvLe8laGOLcynnPalzysc6k27IyPEFtJZajDqCn5WO2SpkpWI5Wp6lfVIkI3AjaRxiroxu9TdxUVc5Px3YPqGhNhQWibPviiUYRlqVCmq2557bWVvbuXWMAt1NcjnKcvI7aMaVNaFtERSGArOdRvRGdSUZskfBIJPGOa55yvqjWFRQjdn9iWlpFYTLBaoJptvzSMM4GP0r7OVlJo8PESlzP1MLW7r7XqZUzEEHayD+LnoBWDabsZUmi/pMt7HJ5AKRpuwYkGSfrUqLubPe9jovEssNtY28D5JRAxUt1rKt7uhMHKU2Z9pfwzgIkQyr5UeX8o/wATWL99HVqlobegX4dbm9ZVbC7ThQBn0oi1AmpBy5V5jlv4bqdUClVxhugz9e+KzT5maVKfLDuypLqKaFrVrOiCOGWYRABSc5OBVQcac0TFOtSlFu73NTWL02F1HMlv5szSYBYcIM9q0qtRne2pNCHtKbT0Rn+LNUu9J1OO6Q/dUDJc8jHNE5SjLUzpUoexUOhw/wAQvDt7420htW0ZRAZAQVi5xjvSqTjKGh00acaUkmedW2va34CgSwnEsodwDcb8tuJ6sR0GKw0ikkRWjGtU00Kmva54g1e6W4tkCWpBAkZt5lfphRj5s81EnNPVnVTUFCyMDxT4J8T2OlPdXupfZ5JRiGLyxuI+nqamtTk4aMpOPNZov6R4t8NaV8PTeWl9G12yfZpIgvMLgHcPcnjmt6UoKhdbmE4T9v5Hz/8AFD4Z6b4rgawS1E2o6jJwF5YE5x9McGvNrRi3Z9TvhJuOux5X4i8D/Fz4Dstp4b8Tvq1uHBk03WWMiKVySFc/Mv45HtXFOMqSdjGoozemhF8PvirrXxNTUtS8Q+CZtNhS4kiijeQSeY+Mb0x0XOBVUZTqay0NYRcUrmp4h0zwnpzBDf2+62EQujuwwduSGPZgPXritJqMXY7ovmVkUZfEnw9jLpf65Y3EZkGJWnXfnnHeub2sLvUz1jK7PP8AVviVoEhvNGHiS2nigmZFt1nRg4bpgnnIrnlua3drs8j8Z6xrtprEkaTJcWRB8pnwWQemanks73OiMpKN0cpqEttL8slkELLndGSA1VoaQuzH1VYxCbiZLiLb1+cEY6flUSumJPXc566ngnmOw7wOF2jt/Su6gmjSetMwvEd2s+pJawOcQLxkdDXq0UooilJKRreGdTI/0cycEYx0w1d1OTvY64zUVc92/Z3+IU9hBcaJdkYQFod7dPev0HhSuo3pVGdOEk5ybO7i8S6lqkEt3fz7kY8KxyTX6NR5eW3RnuU4JrYsSa9Jp1tC/wBwEEiFBwfriumNOnFpI6IuPJZDPDOumXULrWb63QSuNsCE9Pesc4qexwfkc1eo0m2zL0nxXo9/4uutLfLiyAadkUkFz2JrxcpzL65S5IO9jLCV/bScY9CO91O01jVlvNTiKrA+IVZPvfQV9NSpyUdj06bkkk9yS81dvtjLLHmNgNgfv+FE4waaNb2Zx3xV8VS3tsLHWrtYYiwMke7aoUevoK4LtUlzWX5HNja8fg2RJ8JIdI+MUOpQ+ENTg/sjRIx/at8CREjf88w3QmvKr57Qw+KjhafvTfRGeGq4efuQd7bs1Nf8b+HND0qTwz4TkaGxP+tQjLTsO/rXtUMA1VdaTd2lo9kehKSlFcy2NH9lfxjpyfGv/hNNZmhitfCui3moxQCRwsbrCVRmYKcfMwyK/EvpIYuthfDV4SK97EVYU0lrdN3fbojgxNaUH7nbc+f/AIZ6iuvaNq1yniptWstR1e5vL3WDGyi4ldixVQ3JAY7Qcc7c9MV9r4V4WdDhilRceRJWt6I7MFWpfVU4S5vP1/yK+la7Y/Dewu9L/tIzajczMtogXpuP8R7YGPpiv0ejFUW0mVGpUpS5X12Pdv8AgmyUsP2t/D+h26wXDz6TqMk9wsuxixgJJyWXOPrX4n9KmpyeBuLhH+em9r686O7DwVCErv8Aq58ifF3UrDQv2mPibfzap5sCeJ5zudyxkIPAyCc+nWva+j1en4X4WdTRqK02PPr4mNPE1ql+35Gr8HJLe48SL8U5dRt7bWbGdLnQLJgx8h0IIfg9TjNfqOPyXCcT5ficHj1enXg4cr2V1uedgqtSrUdafyPpX/go/qfgf9of4d+Fv2tPhfcxpqWnabDa+IoDbsZpVbKTRsyxrHmGZQ2wMzCOUNgLgn+FfAeGdeE/iFjeEcyuozqSUdVZJawe7dpLrZK6PVxGGnVwSrNO8NfWN/U+RdHkRr1LprqSUJmSZyxC7jng+tf3RVVTES55yblfV9359zy6Nao1eOiL3jyz8IeK9BTVNYgNtqEcJWyvbNx5rOeBuHQj2NeXi8JCbutzLExo1vi+LueXalqPiXwpdrZeLbUCQZliuYmJWZdnf+6fY15rp4mlPlcb7v7jw1UxNKVq606Mfp2vxRWYVL2RVnHm3e2T7wzwvvW1FQ5b3O5uDhpqUvEWn6FqKTCfTo8RqFCkAliei5PYd6VSNKfxJM89rDzn7yOf1jwL4RS6hX+y4gXwHAUcZHWsKmDwrj8KLqZfgalv3aM/UPA+k2dss9lpkZR0JJZRlSDjmvOhgMJGrdROOWU4CKvCCIZdFsIpTHbwLtCfOMfd/GuurRpqLUbG6pU6MdEL4T8DeMPHvjHTPhv8PNEOpa3rV4tppdjCOXkY4yT2UDkk8AAk18zm2Kw2UYKpjMVJRhBXf9fkjxMVWrzkqVFXk9F/meq/tHfETwx8EfhnL+wx+z14ggvVkuUn+LfjqwbnXtQTpYwv1+xwNkAA4d8se1fmmVZbjOIsxWc4yDS/5dQf2Yv7T/vNfcefjaE6UHgcPK9/4k+7/lXkjwaxsLbTrcpaRgBR8wPev0mhhIUYaI1wGXwoRSgiwiJbneUDKy5VQ1dtKHs3d7Hr1GsM1ZbnPX80uveJbbR1dvJjuFe7mhXJVQQTj3615Ga4udWToUVd2Z8viZyx+ZRoR2T1Psf/AIKXeH/gHqmkeHPEvwD17WbmSCyhu9SfX4gkmprcQRyfaI1UYVUIaMqST8mc84H4V4Vzz/DYvEYTMoxUHJqKj9mzej13e59BjquLxeEkpO6hLT00/rofMXhy4S+hwTt44Ir+iaFNQpLUMParQWpf8OTMmpOgALOCojY4B4xUSmr2jqdMeRzaKvw61Sez8S3WnuF3JOR+8ODgeh+nauhVlUm1NnBlNPmxVSnJ7M+mf2ffib/wrbxvpviJCFiguopllUkY5+YcexryccvaQcbH0FShCVNpn7TeB/iPpvjbwPp2uWNysyXFmjK6NnJIFfD1IqMuU+UqpU6jRsaZrFzDJ5ckoAxxmotZGEpGpa6vPc3yxzS8D7oBpPTUWm50VtqdjaFYbdcyEZNKL1KatqP1LXIoVEbfMzdqtSRKepRbWfs4BWEKzDiiU49DZJdTzz9pz9o2L9nDwhofiiXweutz65qjWsVqbrySsaoWd84PTgfjXhZ7nSyXDxqcvM29rnflmCeY4p0U7WVziPCn7fvwO8RTpF4lGoeG536rexebF/32mcD64rz8FxjleJ0rJ0357Hsz4WzKEW42kcr8ctd0v4i/Hix8d/DXxVotzZJ4TjtTq39pIginEsjbcMeuCK9bDZzlscS+WpGzXcyeSY6OFtOm99jP8A/Db4pai0Wm6RaaZdTXUKqt7/bKebOWySCpfCKoGAMCvRwmYUIVHNVE07dUVWwrhCMalNxt5P8A4b8D1T4YeHPiLpGh3HgOL4c311dWOrTXLSaZbeewieMAvL5bME/1Z69cUpZlgYVqkItuzve3T7zCvg5KUcRNpJpLXT87Ca54hfT7Zn1OxurdgQWe5tHXYDwAcjjNckc3wKfK56+emnQSwOJT91XRkw+OtCuZgtvqMPynLAjH1rZY7DysozRtLBYhfZZrweInubKRrOd2iQbpNjfLx0J59/1q3jKaT10MnhasZXaszzj4zasz6C9wImXYC3Ldj0JHp/jXDiKiqQ52x0ornasR/sP3t+2q/EZ7CRUY6rZMg/vD7JEP8/Wsspmva1EmcmNjBKPc9L+I/i7xNLs8OC3WS9ul2QRoPmHqa9ic+V67nlN8zsY+kjXfhzPPoHia5x+580k8YPpWbukXNckb2PNdW8SeO/H3i8eOPDGw6XozlXhUE+Yw6n6CsFFupdGCUqjSaNKw+Ntt481P+zNVuBE0Y2yRSNg9cdK73K8TSdoxOK8dfBPwB8QtQ1H7bptq5RSBIiAEe+RXJKEaidjjcebY8Ng+EGv/AAW8aTQeEYLi9gu1Mgj3lioH1rJU3Bl07Q0ILj4x3+nyzTeINNntpVcjy5UJAxXTGo5KxNSrZnEaj4x1D4iXb6tfyO9rE/7uIr1rNJJkpuaMh76PT9ReVkC7uEQjp6V0cqtoN/u0cxdWEvizxO+uzRLm2wqJ079a5oxlzWKjVTNHT9NF3fyJfKGCdYz1A9a6XGMUROPNqc61vDca9cS2IIjjYBo260uaJhTk77EfjDTUk0iQsgBGGBU5NV7ttTaas0zCiAm09GbH3eDmsqTVyudSWpk6kLSMmW4P7o8Sj0FZ4ulOpTfLuKFWSlZIp6L+z1Y/EW6bUfC/xT8N6fDyzQ6pe+UV9s15NLF+yg4VIu5UasXVa5kcV4m8OxeGNYm0hfENlfmFtrT2UhZCR6E9aqDctbG0uR7MzBL6EVtGmlqzOUKklof2DaHd3trFc3TycspAOOfpX1Lb5pHDX1m0u7OcEepSarJIb6OCBjjBX5j6n2rnd1uTTioq7Oq8MQhryM2jEREjcX+8/vj0qouUnYpzRZ8RX8dxrDxxFQUTClxgLiuWo1KegUlz6kUckt80azg+Qemw9fespSleyOhWgjoNLvLJ7MwQ2ZRAckfxEVXMuVXCTfNe45r23EWILJTvP3XHJ/PtWcpx1VjRxlJc1zE8Q3XibWtasrTSrB544ryN5djBUjUNknNZVPbSa5VfUuhGhSjJylbQ67xTp6JZSXksMkrKN21eSMfSuypCd3Jt6/gcGGrpz5LmNePbeKPD6anBC5knQ7Qy/d2jB/lUq01e+5vKLpTaOS+G2vFfDer6T5SyXFlftE4VSWAZQwFc14Rb6tG1Z+/F9Di/iN4E8Q6xHcSaZpk4iuMeamMDOMdPSmoyfTQxjUhza7nD6Dp+qfCTVRc+LhfXoDf6M0sh8q0GRg46AZ65qpQVJXep0e15o2j0NHxLqmr+MdWjs9Oud01/JstnPzEju/sMVhNznNK+rFGcXG/Y5740WGl+HtMi8I6BboyWyfO+35nl/icn61c6iUORdAp3qSbZxHwluIfDd1qXxA+KEgh8wLBoLKmF3KAWJznk9M+lY06fLJ1KvyCuqlVKFN2ta/ye3z2/I5DxlcWXj7xJ/Zy3kT2l5JIrXCyAqGbgZOevU1y1v3lSzejNqd4wu1qVpvA3hlbe30HTkSKK3heBLgryJQPmz7EgHNdCglHlOiLcdWec+M7K40C9vbDVL4S5xLs2DbPGDnccckjjn2rgrU3zG0KnNpE4bxr8PfD+r2323TVQvE4MkZUE5I+8PXIwfwrB04I3SkldnCa14O06C0eW1tbTz48s37sL5hB7+jdaxquyNI+8ee+I9SstKu9wYeRKu11D5CMc+nT1pRvY6NFE8+vfHWj6hdT2FuWivYJyvlXWVDj+8h6MKpJx3MadTmnypFS+mur6UG+tcY+9iU4zRzRudM4RjqzP1KeDSrR7yQ7CoxHx94+ldEakrJmc5pRscl9tuFvt0/zFhuJHPNelQk5K/Yzox980Yr8WOoo5YMsw4cN3HqK64VmpHY1zaHf+C/EMlpPDewNt3DDEN1FfSZZiZQqwktDqhUVDY9di8RGDTLa7tkUBxwzyYDNX7PgasKlCLaPapYhOmnct3/iJLK3WW/vYzLKuREDwK9FVKa6HS/dV0Lp1zqB083NkDHnBMs5PAryMzjVx0PYw2ZyVIyqqy2Jor7T7KJ9P0iIAz/Pd3HeQ9+a6MsymhgadorU6aMY0oJRWpnx6zHd62S0ey3iTGC3OfrXpzlUilZnXHkfxFaHxDJf6+bO3jNxIqkRRxAkj8qwrVI0sNz1JWXcU2r2OJ+LPw51n4leLofh9d6otnYFRLrdwz8rD1KqR0YjivnamLqYuHJQ95X6/8A8mvRWMly3sjZ1rVvCfhfwTZfCH4R6SND8NWSAtEpCyXUo+9LI/Uk/jXZlOR4TAy9va9Tuztp0qOF9yCsvzOf0zTdW1RJJbC/8AKhX5XuZjw/09q+hpYitzNxdnax0xozxHodN4e8Mad/wqv4gafNrUlnYDwpO+rX1oyrcvGuGKRg/MxYgDqOK/BvH+daOVZbOEVKUcRFJPa70u/QjGYWi8PKNSTt+J5D8FZFg+DOi2FvP5Qe33r3OP8cV+pcITVLLqVOo0m03+F7F4ak1gIqDtsY+u6zqWueKls9D0SW4S1jL3t7JF+6tgOuMfeb2r6F1pOurLTqViKz9tH3dFuz2D/glj4n8K+Pf26tC0O80uO/07+yb6CQy5jM8pgPGWKjP41+OfSNrut4QZg6WnK4P58y/I46GbVK2In7CTXL/meJfHrwvpGl/tp/EXw1exbbeHWWlWyuX3lcgHG4MQcfX0qfo55jLH+HWG59dDsrezlmk4Td/dT/A4bxhYHSidR8NSuYGcsUAKkkZz9BX7nONWVPmotrXbVbf18/Q4sbRnTjeCPp//AIJqfFzVPiz8JPHf7GHiia21LT9XhXUYtF1ERebHGf3dzdwvIDukgjPmCIAbwp5B5r+MPpI5BSybiLLeNaKlCtH3JzV2rrWEZJW0k/d5unZ7DwWLlOgnGn7SqpKNnJxXJJrmezu0tUravS6vdfMPibSNe+B/xG1f4a65qCiTR7h4VniYBL2A8xyoQTlXQqRz/FzX9J8DcX0OLOH6GYUZWco2lHtK2qfUxxMVgsbPCTVraq/bdFG31d9Vv21QM2+F9lnaSDkc/ePrX1cbc3Mzk96VT3jSutRg8W3MPhVrOOVJTm4LbcSMOq5bgccVjinH2bvt/Wh0zlCdN8yuuxxGvfCeOzluL/wb4kFhHGdstrcjcFkYE4HsAO3Ar56thnF/up2fY8mvl8oq9CfK30exxuo3fjDSZUi1TTS8KSh3niYsGXpkjrXnyrYuhJKoro8Tlx9Kr+9XurqjR03VxrEgvokZ/MkBjPoFzzg/SuiOLVSJ7EcXCpFKGtyre6qFvni80eUR8qbu+c80lWj7XlM4125crK11qSPO+4hUMZ3HrngZrrqVqNODlNg+V/Gx3hf4oeOvhraaxF4Lni0u81y0+xz6zD/x+RWbD54Ym/5ZCQcMw+Yr8uQCc/B5plkc+xsJ4h3pQd1Ho30bXWx5ydaDnyxUebr1t/wTmbCFI4B5ICbWHuT717PJGnG0FYxp0uaCUdCZWBxDHgsUwTjhTmtqckoHTOaow5Y7mZr+vz+d/wAI9o4We4bKs6crHk14+OzCpOXsKGrPlc0zetOo8Jh/el3XQt6BodtoloYyS0z/ADSuw+8a6svwkcO+ep8TO3LcF9Uhd6ye7PrjxnPZfFv4Kjw3PpUEGoeBvhlot/okTqIjcxMJPtPHJlPzA5PQdOlfgssXDJeIIV6TvCvXnGb6Jp2S8jfJqFT2uI5neKd7fI+UdJf7Lfnyk2xuxKD2NfvOHqza5ZbdDdXoV+RbMtWFzHBqZkySBICpB5x604p+1ZvhtJ6j7kRnxjM5gETBwTMvRgfWtXGz5gWIhTxTaWp618Pb6K7iFjvT5hiQSdzjg/jWFRNLmZ6H1iU9T9Vv+CYnimHWv2e7XSmZxNpUjQziWTd37e1fG49Qhimkj5/Gwl9YbtufQtzrdtaXuJWAzwu6uCUkzlSZatdRuZpxdxx4jA5JHBqbtjUL7mp4f8Rrc3zXc0gEcY4z2qnaw5rQdpniO78TeIJZbBF+zw9weprFScpEwp8uy3LV7q0LXQWWcF1HQchTVSTUbo1d5Qtsz5Y/4KPeO4fEHxH8O+CrUfutB0NpZvm486dv5hVH51+c8aVnVxdOkvsq7+Z9hwhQajUqvrofNV9qM8dyLNIw6SRZ+YgDjjrXx1OSVTlaufoVGUuRnV+Fb621maHR7XwrBBJaWknmy7cibPRieelexSUKr+C1i4c+7dznfEmnWRivvsrssyFCPKcrzj2Oa6I04K6aJre+l3O28K6noeh/Cu6sIPEHibS9diZ2vbrS/EC20F5p8iBG/ds6yXNwGfhM7doOeM1x4io6UpKndSe9m9Uc9SnWnVXNGMoJdVd3OW1XV103V7+Hw14v8U3GkkxizXxHqKtdELgASKh2jvgDoBXPQpSlFSqK0vmL3pK8kMa+vYWcS38wabkkzMQAV9jmujVPVmnLy+9Y9V+FGpytoNpCZnZigDbnJzkn1PPSvosqlGULPofMZrNubsdh8XPEk2oeEJJbqchhYhVIXGQOP6V9POUPq7PlYqftx37FmttpFv8AEDUC+EF1ZSZZsZxap/hTyqrCLn6/ocOYXUkutv1PS/h34iMN3P8AEbxcwE87sthG4GY17GvYpvnXv9zjp0+W5hfELVrv41eJ5LHw/qXlosGy7vUP3D6Zq6kuZ6Dqp8ljjLfxNF8FrePwNeTh/NkKxSMTmZieT704x5dzGKdrmR8U/hfaXAj8VeDrqOHUHhEkoj9+1Opy8um4qqckcF4H+K3iXwXrt5pHi1Bm4B8qbadpOOhrlp8ykYRVuo7QvjLBL41i8QzbGjDyWxJ6Z/8A1Vqp825nflVznPHOoeGfFWpXd9ay20sdsSZVjxnJ7VKlG9kVNwktDzvSLm10O8v7KztlfzBvjR0FaKF3c0pLlRkXNhpd3dG9vlWKIgszluQfStdEjHETs9Tz+TxFLca5ep4e09p4McyJ0LexrBRnJ+6c8Jc7sjQsLTxHqcPmwx+S8gwXY8mtZQfKrnXGPLEx7fR5/DeuyW8t2ZTM2ZQWyQf8KzlCzuc0f3dQ0dZWzlspEliYeYnGR7VWria1Xzx0OK0q0lmgmiimGYmI2n0rNNRlqZU433MLxNEPss0W3qhyB61cpNK6OlJJnj82jGK6kEV9cR7nPCuRWEMQ7PmSZzPAQc+a5esrGGxt/wDWO5P3mdsmuKpUdWrc76GHhGPuiNjfjt9a6Hbl1LrSdNaH9gU149hAyMdpxlee/vXv1vdk0ePNt1n6mTbtamQy3Nw/kNzIe7H0+lRFJ6suV2rI6zwciTSNdwQrFEiExkPk496tySM+S0Xcz7q+t5LuVBH87t+8nl9PQV58ppvQ2owZZsdRjheaYW+SqYVSMn6j0p3Rta8kjR0XUoprQiNXjjA+fPGW9T61hOTtuaSgky1Y2cd2xffMYE5YudokP19Ky5HPUpScY+ZU8e6w9joMj2gkgiRc+Xangke/erlNKNnp6GdGlGNS71fmdNcahJdeHbe5RHWOWzRtyvycrnmu6crw0Wll6nHQopVXfV3Zz3gK7ujFqtppV407xTZ8mccxow5wcetccbu6R241Rlytqxxum2VzpXjjUri8uFjkkizDp9qoUSMp+8zDqcGlGhBTvJjnC9KPVDj49vNNvZIJD/rWyzbuFOcY6da15nAhUKersc749ew8bR3WmQwtGoiPnz+YCT7dOtTGtGo7MpU4xStueK2K+O/2fvHP9uWEsuq6fPamOWxuJxusQf8AlpGzdD6g1xyozVZOCNp0/aQUVoyTR9f0j4y6n5vh/WFuIZJCJp05EIGS2/0I96KS9rOy+ZEEqWj3Mb4reItK17Tbi30SBDa24NjpqtwrRr/rJj9T3qcTUc7pbGiTjLle58wR+Ftdvvipp+oaXrM0NtbXIIhgkKxuM4yyjr9a8yNJyxCktkehCKUfeR63qZvLe+mhtp9twtzujVuiN/gwrunU5ZEqMWeZ/HjTf7V1Wzt5ZpofIjIBhJV4gRjgj+GuKrWcnyroaQSitDwrxjpnj/Rr+507SPiDc/Ph1SRVZRtHGDjoe3esaUb31OjmcoWaPOLzxj8TNO1qaPWvGDTWtz3kth+6kHQ8e9FSlDdvUVKi9ylrVnfXfmXOtzpI8gDFk4Vsenoaxc2nY62tbnN3egWsrGSWBTk/umyMY+vY05yb0CVuUq3n2fSLaW8vp/Jt4jmRiCcD0xULVmV5crb6HHat4hXxNfrMk6tbQki3UgjIPc+9dlOm0mjJSdSzRn316i6nHAImwifM2f513YdNQNpWjKxcvba1vrZBdtt2MGjkGcg/UcV6ENEVFNanT+FrpRCIsnG3GD1z6124Oty6M1i3NnonhrUVvrOKfUpS0NkciHOea/XOG8XLEYfl7Hs4JJr0Ne28UaNrurqzOJDEc+SpzsHua+rdOVtT0lJyVmbOo+JxcyJamfEarwitjIA71pSUYvQaTWxW1PX7VdOR5IAkCA7nzyxq6U7t3ZpN8qucxY+MX8U+IBoPhKNGaLhyj8AnjqeM1lVqU6UW5O6RjGu6s7djv/EFjJ8J/hVrukfCG+g1P4iahCAl1KQ0enI3XBP8WDX4XxRxJmmd8X08tw0XDDxd7X37Xdv0PHx2KrYip7HDv30efeHdL1vwx4Qt9G13XGv9ZmXzNXuBzl+pBPfnPFfrmX0o4RRTVrnq4eFSnhYqW/U5u5g8R+I719P060klcHBXGEHP8TdhXrUsSpy5UyJ069aqlA6SaxfQ7dFv9ZikuEj/ANSmTFFgdsdT1rvpwu1Y9ylB4ei43M2x8dTaH8MviF43drRo08My2qXF3GJHSSQhR5aMMZIBHPSvxDx1qxrUsrwUVdyrcz/7dPPxdTlw1S7d7HnfgDUp9N+DulrKwWVbBCu05PIGa/TOHYN5ZTT0aSsbYOUvqcG+xoReJ2sdOh06yYRrsMkzBMb8+vr+NfU6Kokhyqt2SO2/Ye8T23gn9vL4aeO/7Ntl8/Vzp+6eQiI+cjoC4+6PmYc81+beNWVSzLwrzWnH/n3f/wABdziVCk6r5U1ftueVftgTa34d/bo+KC+MNUsbjUJNTVg2ly74VUjgKcDgDjGBXxf0cKmCpeH1FYdNJWWuj8zor06FHOJuUndwi1fc5Kx8UCc7LqN2tzBtiYgZOc5zmv6Hp1pq9tP61N705xV3uQ+C9R8QfB74rad8bvhvdCG+0C6S6i4yt0uMNE4PBVkLKQcghjXxvFvB+B40yHE5Vi17lWLs+07e6/k7Hj14SpVvaUv+HPpL9qLwr8M/jP4dT456V4Is9Q0bSPC8Wr+DI7i9lhk1vTs7LuzuHi2sHspSwXDFivXgDP8AGnhjnmYeHvEiyvGyfNOq6VeL2hL7FRa7TVnta/e9j2auGee5TKtUhyzpfDK+rXnp02Pi671LXriNrjwuba1+2TMYrJQ7xwLkkKHcliACBkkniv7TqvGTjajI+QxGGzF008PNNvubfwznOl6reWHxKntbcx6fKdGkEDGKW8wCA/dcjIB9SDXxvGGK4qw2Ew0MFRU/fip27X1ZdOtmGGpv26Ta7GXd61exx3VndlI4JX3CJMnYSQSmScnr1NfYOlKcOaUbN9NdPLW7+82lUqtJsguNXivbyK0iIUuxF1Iqg5A4C/Tk/nXI6cZS5WKok4NPqY+t+ERqF3Lf6fO9rdMGIaAhAEGOoHXiuDFYGlJ3jo/I8CtlSq1eeEnH0M9rnVvD90ZLvR7DUxFCCqXMJAdc5ydpGa8XEUcXRTcZGqqV8DdySnp1RR1TxFqvi9ll1CG0t7aMkpZ6farFEvuQOWP1JNedQpzqu9SVzlWIxOMnz1Hp2WxQjhIMgLhj0Ar0aMo07pHQq8eVp7kF5e6bosIlvLpUXqqA/Nn6VjisXh6C1epzVcyweBpXqz+XUxbvW9X8QE2uh2zW1u3DzEfM1efLEYrHvkpKy7nzWIzLH5xP2WGjywfU09B0e30mBorcEztyzkZLV6ODytYfVfF3PUyzK4YaPIvi6s1bG2vdU1C20bTofMuby4S3gjUZLyOwVR+ZFVmWIp4PDVK03ZRTbforndiJ/V9EfVvxV8ZaZ4C/bdsPAdwBHpOh6XZ+D76NfuyxR2ywStnp98t2r8DyfAvOvDueNcbVHVlWj/4E2n91jfJakY4acv52z5l+IfhObwB8R9S8IXUZVtL1SW32nrt3EofyxX7Bw3mEcyyujiH1S+85ajlKsm+jsYFlexz6i8kR/wCWuCpr3IVE6kok4Wcp4hxRPqU0lt4x/eONssCkrnqKr2sVPlNnTaxtn1R3XgjxAmnaglvdMuwqFZt3UHoaKzVSNonrU6KjE/Qz/gl78V7jw/qWp+GxG7QXG2RpVfKZx6etfHZtTUKikkcuYcipq59pza5pN/cJqVxL8i89e9eK7tXPDdQ3/CvjrTdehe0twioq4z64rNTSkaJ6XL6DTtQglsdMm2tjnBwTTb5iJy5nYi8Nazb+E7ebT4zh2zncefxpU0oz0KVuXQW21SBrhpjJudiCctxW75uUtRvHU+Ufjb8OviR8YfjT4w8ReCPC1zrcWnXiW93DpBW4ntkSIHMkKEyKuP4iuOetfkPETnVziqrbH3HD88Ph8BDnkk5N7njd1o4ub7+y9b06ePbER8/7kg9erDjpXj0KPtaqUZK/qkfYwqRjHlZ1fwl0u6HiKW0tbGW6dNMknaKykMzJCgLO7HHAAySTxXcpSpTs9Wl01JdX2MVKeibsVtVaybUJpoYisdxAhVmHO4Hjnjr/AFrojVnUOm19WWdRut9nHamyjYRuMlgA3OM8jqBirknFbGSnLboc1cRxQ30kwVMsrBpfU5yOvXr+tc0pSXQaippsq3FwXkdmOxS3APQ4FZ1FYylK3unpXwv1Q/YrKBXxtcZJP6V7eV1OWC0PncwpOrUsn1Oz+K05m+FL3EjIWAfyxuz36V7uLquOD5jxqaksTyNGN8CdavdMttf0nTLZGudWubFYolPDYt1yT6AVhkNWVRzsefnEIw5Wes6n8KfFmvxWx8T+PZY7cAMbaxjwgH93NfZUYShZtnjKrDlsi7qup+EvhloH9maEvkRIcTbmG+Vj61tOyOd1W6nLIx9Vh8Ka94akvfEFklzPIubZ24eH3FaQUeWzIc1T1R5b4L1jxBHqNzZXchuI45tkTFvmZM8Zrm5ZKT7EynKaNDUtB0vxbevpWqWiQy78xnbzWtNp6GXLoeY/Fn4W3ngpZJ9KKzW0s29niP3PU1jKGjZHs5Mq6d4asr/SBc6bGiIsYL7R/rB3J9amMYy2NlBKJi6/4Rj+1NqdqnO3KhT1x2rVtoxnJo8g8XXV14u8QHwv4bkeFFlzfBv4R3FYc7Uk0c8lKo7F9dPs/CWinRtBs1Z1jwW7sTXTRvE6YQUIkuk2GqR6b512PLQRszc1tUTFFy5jkFXW5r6fUI7BXhR9qsvLMPWsoxu9QqRTehoDVrG5QWkzDfjDJKMH8KJTSdkYqVtDjtZtTpet+fbApHNnORWcmpLUhTk5GLr7BkOCM85OKxmrF8zbVmeX6wgF2+0fxmuBaTaPUopSp2KfmNjYex6+tb+zgtSofu9Bm0Y8wseKzrTaXKjOs11P65dXv/tDmOSQFVlGUbjP419HX1qM8apdzdvMnjltr/U0L20UbINiRYO1fckVEU2yYuUJanV6dssNBnCKsZlXbG4blz3PPQUVJOMbGjaumYMAjgheUxSSADCSsON2ew71yRSaOqm0omnZ3D2m6S6ZYyE3SE8lh6GiT5HqJuz01G6DrY8RXTX9qy+W85SNdmAQOprjvzyvc3s1ub2q6rHZw+dLOdip8ikYH5Vc58quKLXLdHN3Ok+PvirayL4Wlgs7NMr9vuSQgI9APvGsqSr1byg7W6le2wuHqqdTV9kdf4bguV8Fw6Be6vHe3Omxrb3VzbrhZWCjnHbtXdTTlSSctV+Jy1pxWJ54qyZk+AtcsPDPjC60iVw76n8iPzwVBIBz7E/lWNOp7OpZ9TXER+s0F/ddznfjJZ61aXH/AAkOj2rJdQSGSIwsBvA7E46GicqnLztG2HcZWg3ocrLfaL438LN4m8MMyF2zqNrNLmW2nGcqw7dePrxRTca1N8j9dR1oulUUGclovjzT9BjubbUIY0kjmD+XJncSOckHryKxsqab6h7KTaZk+J/EEuv6QdPtbdHvtXb5P3eWQHufSk6klG3Vm0Y637HgHxX+FnxV+DlvqUvwf+Jc+kahqsDLqMDxK8Nwe4KH7pxwGXBFReVK/Lo2UoU5yUmtijoXig6/4Dgis9Lu7bVYoksZrCch/KYDMkg55U9QepzXHKo3olqapxlNnO3kcema39qsG8l4h5Ks4yjHAwT7HkZ96cZO9jV3tY2NQ8Y6SNOR5H2yJDsmllfOHHKgnuD2NKTijNyvocF8RvF+laxryajpki3Nt9nCzgv80EmOVbuv8ulck2pSNYRaieW+NbeGNvMs7t2kC5tZuoZf7pz3FJRUep0Qfc4PXtPtNYEq3sTC82BmwMc+vuMVMouTNZyklZI46SaUCa3a2AYHY4YZDio5YxYru2pzPjbxh4P8IID4g1iOzckhLQtvd/oo5qlTnPZE1KsIO8jhtR8W6p4uLOJxHYhsQwJGQZFzwWzVwpRhLUw551X7uwllYrLIHWPjPJA5GK6la1joUVTjoYcc8+teIrq7iMeyJtkeD98DrXUkqaJi1Undm1ICbf7Osm9HXBXd901tSnc2ctLIs+H9SniVBIwbyzjcK7KVotM1opxd2dzoWsPanKEMkiZwTxmvvuGMb7PEcnRnq4apyyLun3n2BWvpDHaw7sybByR71+mus3BHotxkrp6mf4N8Z/8ACzfF93Z6LIv2DTEPmzLn539K5lOr7S3QmOJU6rjDZHdX91Bd6ZHpc8iiKMfvEB6Z/rXVzqOiN4y5jKsb/RfB0cp0uKODfu3u6cvxTnTlUiOUVCXuKx5Z4B8FeMrn426t41m8XXkml3KDFuLhlXjJ/wDrYrwaeVYbD4uWKkry2R4GHy/EQzKdectGal+njnx742e3k1VtI8PWz5nkSTbJOe4BNebV+sYzFpRlods3KpV9mnob+reIrXSdMfSfD8kkdhEMks/zSn1z3r6/CU4YejaOrR6arexShE56a/u/ElxGIpQlvFGfMAYncD3rrU6nMrbdR3qTW5Y+IOqeINI/Zq8VXGjaZK6X+p2WlT3RCFLUSA8hDySRkZHSvw3xS+qZhxrleElL3oxlK2vddjkxcqi5ad9Wc14h1bQ/Cfhy10mO5jVbeFI43I4wFGQB71+tYJ06EIQS2R6U8UsPSUDMvfF1odStoySVeHJXbgZxxz6Yr3FWvJHJGbdS70Ov+Aeu3Fz+0l4AvYXt/ItvFdm0IuG+SVzMow3H3ea+a4/5sVwPmNH7Loy/J3OyMo05qWvyMP8A4K66Cfhj+354m1iC8tbhb/D3gs7gusRzjO0/cXsB6DrX8+fRtzGrLg2UWmo05W1XT9TyeKZypY7DY+75ZQs/k+p5Lp2tWkuhSyCbz3kQuAjDcPYe3+Nf1PQxEKsL82jKhi4TpqpB3RbtPE62vh6RbiZJIhsLBj169fYVo8c6VNwUtNH6tXt+bOhypSp899j6d/4JbeN7P43adrv7K2o6fHc6nbPca14LvbloRDbwNEy6nayNIyny5IgCAmTu5xjJH8P/AEmcnlkec0eLsK7Uq1qdaKvdzTXs5JJWun1fTzOfLs9pZdmVO9KVSM5cj5bWirN80rtO10o+6m7yWlrtfK/jrwxc/Cv4p+IfhjqKMW0a/lhgeWFoy8RbMThW5wVKn/Gv6Y8OOJYcScK4fG9XFJ+qVn8yZSp4bGVKEns7r0eqMp9dOoCZrh9pC8tsyQR3r7lVVGLfU5qr5tLmNeXNw9w1jLG7uuZWm5/eoOSa46lR1OpyTrqGjJtIuo31WSWGBGZUykanOeOtcUWoyu2aUL1W0yzfa3CIWjs4UV4IQHDcltx5I9sUSqwcrNmdWpGnsU7y9tLeWNxKpMUuHMvQKw6H27e2K48W4yWwqk4Sjexy/ixtH8L6tO6XKQwuN/lbs9fT1FfL4iVPC1WtvI8DGSoZbVbnJK+tjl5fEOq6zIYPD1syqes7jn8K4qksVX/hKyPnK2Px2Mny4WFk+pPp/gcySC/1adp5CRkue9b4fJ+aXNVd2dGFyDml7TEO7N3+z4rWMRQwAYwMgf5zX0eFwsKaulsfSU8LTpWUEPhtbdJkl3gRhPmOelXW5YTTexvKdKlJSTPdP+Cb3w103x38dbr40eJreUeEfhbpkuv6vem2LwNdRA/ZoWYAgbpdp+imvw7xe4iWCyFZbQlfEYuSpRV9bSfvO3lG587i8RGupyi3orfN6HlXxH8c6t48+IWqePry5zd3uqzXjShyTvaTfuyfwr7bh/JqWWZDQwEY2jGHL+B6cIrD0KUIv4Tc/aJnXxJdaL8ULeQyf8JJoomu5mx/x+QNtkXPsMfhivP4QoyweJxOBmlFU37q8u/zFRwKwql77mnzTvJ3ercrLbRXtFdEktTybwkJJx9qfozEvk19TGcnUdjjyio6kXO3U1/GJgjv9O1GEBw8ZRufQ1bi1JNizSrKjjaU+5vaUltf2Ud89ysUkGAo/vCtJuVPY+gp14+yTPrL/gn/AHGr6t8SdNsrOIywsuy5CTmMkdj7187m1S1PVHFjeapTvY/QfU7aOztpNL0+6dQI8Krvknivm3rdHlWtqzV+Gz3GmaOYY5t8zcEBueamNOzdxSk5aHT+HLi58NK95ql8WkLE4JyFzSfusS93Qv6RdW+tyyXdw5Ck/fXgVaSLi9Ste6wkN59mt5WAVgM/jVqWpo1KWl7HyH8Tjbt8evGWqxDbc/2sAJosrJjylGAy81+TcQKFfPayeyt+R+pZLyUsngkr/I86X4heMtN1y+sU8U3rQrgLFPJ5iDjurZr4/FUaUK37tWfc9mhUTldG9pfxn16wjmtrnSdMuVu7cxXDi08lpIywYqWTBIzXZSr18PpCd00en7ChX5faR2Fv/i9oBlS41Tw1LFiPbGlrc5VB2GGrrp5g4L3ofcc2JpJT0Y2L4oeA7hyNUuNTgRypaRbVZGX6DI/nW08yhK7ady6WEpyV+YhfxV8HLjUzFb+PdWjjbOJbjRcHHbgOayeKpS1uyK2GnGPutFTWPEXwvspyJvGuo/MpyRozevbLCrjiaNR6NnLHD141LSsvU3/DXjjwrbxW0fhrWri6dX3E3sUUAUc+rsw/KqWd4XCR5XcdbJquJakmjV8Y/FCXUdAi03UZreC0tEYXEcCvO8pOONx2hQfUE9BUy4nlXh7Nqy+85v8AVxUpOpF3aNf9lXXYrn416vb3kqi3i0i0kjR3wyZjx0PqB1zX2nB841I1G99D4fiTDSp14u2lj3zxz8QI764i0jSpxHGE2ja2FUe3vX291F2R8g21Kxw3xS8KLcaDHqk87gxrvQyP1YHhiM1lKHW4p+6rnNT6j4y/4R6PV7i9t7mCRdjJAcOo9MVcJNRtcxjC7uef+LfHnhzwfDcXEOrTQXkbZMb9v/r1NWWtiXW5dEjD+EHx9l8Q3d42q363E0jskc5Y5UfjWcJcuzuYufvG9qPxRjjSTTbiZZISCjhjuBNbR5bamnM3CyOU8FeK5rLxBdeG5pgI8l4MHAZT2rWCURxjJq7ZZ8e/EzQ/B/hS5aeRfNQ5CFuR7Cone17aEvk2Z454P0Hxfrs1z48vv3AupMwwBMHb2z71NKlz63OeEJSnc39NicedJdQhpMgEN2rrilFG7kloTeIFvFUwRx7V2KEQYG6pndq5M3yxM4wjS5431FBE0w4VR8oPao3V2RFq2ph/E+10e6SHUrGNUuI5MHYcbq52tdAlFS1OM8V6ms1ksluSHTG5GNaU6TkQ30Ry+p3xmtS+AMjn61FaNpEKx57qLl7mRz/ePWvNf8Q9Gi5KKKTvlemD9a0qKyOyDu2RSkiJh1yKwauzkxMrpn9bK35jnuXmt9zKMoCc49819TWV6jPNqu1Rov8AhlZtUuUdCYrfOZcjBc96VOOupDabsb+t6tBLei3tIWkW3j+QSJgMawrNylY1cHGKszLe6mlvDLczn5esS9AfYVyt8u51U+VQI9fezlZbBElDyjChW5bPUk1hVmp6G1OF3zGvHJYeDLeyt7iNQkVszDeeje9YztCKTJqTcm+U5i/1rVfir4xs/BOg3ZS4uTuu2Vc+RCD8zH09Priua9TEVFSgVHlp0nWmtj1XxRBpfhrQIdB0S5Zba2gCJGhGGIHLZ9TXZWpOi7Rk7JWtpa/fa9/nY5cNJ1E6jWr/ACOT+DWrzz6n4h8Padp0077YZ2BfI3NuU/T7o/Wng6j5nTjFseMVOnyTk7GJ46v9R8JeI4/EOp2j2IsLqJ4lwMSLvAfODn7pNXiFGn70laxtGKqU/d1uema3r2isPtVzp8U6m2JSR26kjgYrr9qm7NXVjkhRqW0dj521/wAe6L8JvHF94oHh+RLPWfLj1ae2yEtduQJnTYcgZHzZGAOc9uBThhp6LRnfChOrBcz1Rk/GPTbLVo49W0a+W+v5lMts9tEAjoeVJI7Y4z+NS71Ho7sXNLW2x5l8CfiFLY+Ntdu/iw0WjXlmgXSUkuQftEY6upbjPbAqaUJc7dR2ZclNwUUw8ea23im8l8QK7bHl2Wasud2c/OTVyXNHnb9CouUY2bPH9X13xd4C8fP4p8FavBeFLZl1S0mTdHLuHyqTjg8kgjmsHyQk2tX1NKcFOKk2c1Z/HHwT4li/sLxK50bV3nylpcABSo64fpg4+vNc7afkdTjJq62HeNfEngG007+z9T8QWFpHd2pa1e7ulUXIA3YU56g8A+9ZycH1Of2tOMrX2PK9S1bwZr2oi50nWLSGa4iCSEXiE3G3p908+lc0knqjtpXqrm6HPa3rNj4e86HW32W0smDPMNu0juCcA9uRW0aU3uaRSbdlb5HnXxG+Knw78JTm6vfGVmzLnyGiuQ0j4/h2KST1qakHGTUSpSjBas8U8R/Fb4qeOr+eHwwsGkaXK2BdLATPIP73zAbfyq/ZUadT4lLzV7fikzg58TW02QzRPhrp2n3SX+rCS6u5Vy97cyeY5OPU/wAqirUk9Is6aVJy+PUsvGL+Tyd67l+SIhcDA9aiN1udahCC0INW1eHw3oN7q7ZLLERGo6ljx0ropuLlc5cRKUINpHL+EJYTYI0YyCd5JblWPWuvR7lUOb2WvU1prt/tRtptuGG6JwO/vWtKVtEXCVpO5Na3EazbkUoso5GeN1dcHfc6velsb2mauWjVMLwMg5619LklRU8TFnXSukN8RXkmvRHSl1YW0Tr87Jnp6mv2TDVoSppnSp9GangXXPCXw08DXUOgkjGTNcA8yP3oxE7L3WbJQp0XykPw78a6p4kFx4h8QwtFaq/7qMjHHY81FKTvuThqlaXvWZoXXiGLWdSFxPcOsag+XCB94V0udSPU7J1ZTklcZZ61c3F+6L+5hUcBV5rgxbcqepnWck9DnV1678SeJpba71DNtA3/AB6w4+Y+/pXmYSCjUa6nPh6cp1m3qcz8WfilYaLfDR9MVp5nxFBbr0DE4x7mvXa9j7999DpzDERw1JdZPY6zRnOjaVZvq1ssVw0AaSPd3I6tmuqF58rZ10ptUl3Zj/EbWrex8GaXc6vCWl1TXWmsit7tURxLgsydCckivxrOq8s18SqdODTjRhZ6d3fczqV4Rr04t3Zy+mWUOu6x/wAJh4tvdllbOGtrOQ584/Sv1rDUJJ+0kzadGFSfPUehifErxlaa1eKtqPsNojKpdByFJxhRnJ7CtcViUldM8/G4mnTgkzs/A2qSaR8RvBpd0tYv7csFV7tN0aL5yfMwyO1GdQjV4exUWr3pS0/7dZ0SryhUgo3u2tFv8jsf+C1LaVY/te6jbaNPp00ctlgpZ2DxFyTj5y33jnP0r+YPo11W+HMbTlF6S6tPr07HHxZOc8Bh1JWcovTd7ny7b+F/Evw60uDUoL77XazQZu4u9tu7fSv6OwuGr4Oaad4P8DwMHlOOyrDxqKblF6tdi/by6FrDRvc3jsSg/dBvkfHqewxXs8tKors9+hKhUhe51nwW8V+HvA3xc8OeJ/FEFzF4fttSEGuxaddNDNJp8p8u4CsuCMxs2DmviuP8oxOf8HYrD4aEfbRjKVPmSklKOsXZ6dLnFi/aYet7XDuzR7n/AMFX/AcXiPWbn9qP4feDpNKt/D2sDw/rWjrdi4f+z9gfT753HzMJIiMM3XI5r+YfAHiarw5iI5FjK3tPbRdSMrcq9pe1SCW14vojsznL3HKKOcRd5w92ovLufIOi+J7DWF8+K48wSsQRvxj61/W9PNY4uTfNd9T5/C5rh8Yr05IsSXJlc2X2obZBtkbrtXr+H4VXtlsmdvtqadmtTOubOeGR7rRrt7a4hH+tV8nk9D68VzVacaqbjLVDqQbhz0pcsjPvtT8UFmu/skLF0AdEyM49fevKrvG3vA4KssfUd1FMzdUu/Ger/vEtIocKAcEndip9nmWIjrocld5vUh7kVELTwm2uz+fr0xlnAwFfoAOwrHDZROrieeu7szo5P9dre0xcuaZq2GkxWTLbgCPZnIC+1e+8NTpwtax6X1RUZKOyRP8A2jGkAuoY9zWzDz0xncvrXLdRXOum5nOuvZ88Ffl3K93dTajdpDotu9y8zhbe2gQu7M3RQo5JrStiadHDSxDajTja7bSte7/JMzjiZVrKlq+iW56Hp/wp8L/CULq37RGk3Opa60PnWHw3t52tiFwGR7+UDcinP+qT5yDyy1+PZxxnjc/ruhkzUKC0dfe/R8i627vTyZVfK3Ti6mK+J7QT/M0PHP7ffx+8ffByT9nXwvc+H/Anw/ecvP4P8F6BFZRXLbiQbiVQZrkjpmRycVllHhrkU8zhm+LlKviY/DOrJyt/hWy+SR4WHwkpVPaSdvJHlF009nYs93cpLEiDay8Yr9MdqMXzbI9dxmqfNPZHZ6XqKeLv2Z9U0oQ+ZdeFtYi1C0IUHFvOPKmBPoDsNfM5m54HiPDYpfBWi4v1Wq/C5VSqp4eLj2seaeG4pYWIYDCk5CnqK9zCtJOTPNyWEoUnGWjL+v2t5qOkFLWHebZ/NLJ1A705yUmdWY4N4qjzR3jqXfAN+NShFjcEEOMDPBFbTqwlTTsb5dKnOldn0F+xl4y1Dwf8SbW0+1bWjn5bJBx7eteDmNL28dEdmInT9kfo1PqV7qFrFd2l4SJkXbIOpzXzNSChJo8KS5nZHQ+CvEcvh2FzcXBeRODuHQ1i9TNx5WasPiSXWb5p751WDOchiM0pQY0nuzptH8U2sts0GmuBEv3zv5FKnfYvl1MyTxrZS6stnbtufcA3PXmtrSjonua6HzL43kll+MvjG5t5MSf24dhPIztHWvyXN1GGb1mz9DyKtKOGppM8s8UWkw8ZaoGuQNrgMOAO3518lWnBptb3PpqUY+0bJZYzaSfZWYZTGSpz27GlHmkj0IVWnYzPEE+FAyR8vJxW8Gm7EYi/Lcypb15otzthhjBHpz1q5Nt67k0W7GbeXkgkODx7/StYQ0uyK09ChqGpTXEeJZ2cKuPnbOM1vCC3R5sqkpb9DovAt9DbXjXWw5ZY49xGNrZ7fhXkZnTcoJI9LCVowqHoWtySHw/PIyjy2VDjPPPWvDozbqKB9BaPsuZ9jp/gxpY8T+M/Et9pusJZ3ul6ZZ29vk4M48vLFvU81+28E0VLDzntbQ/G+MMRKWNjTiuh1dnH8TtPvDquoWS3SR5Ktz09a+3e9j4uSaZznjX4yaprVwmgJcS21x0JckKPpmlUSS1FzXSuMg8bXPhOALe3pmMirsKtkZ+lRTtzBJPoac6/D3xfp/8AbOsRxmRm28xD8TW1SnFq5hOnGZ4z4z8I6R4X8TT6v4EugYCGEqLwPrx0rk5LvQ5pR5XZnW/Di306aFI7i3S485N0u45w1dCp6XN6Ka1KPxN8PzWcbaxpF4kU0LfuWUY49DW8Iq2pU23ojy7wJo+v/F7x61rrpY29k/7yMtxI2c81jUnJvliR7GMn7zPafFw0vwzZR6fYxI4RQNoH3TitYR5VoavkgjzyQalqU9zc6fEME53Uc13Y4pNtmF4nfxHNOklwW+VsMUPI9M1FSTvYHeW5javqes3zrZ3mUZTlTvzUXbVjJp3MHWL/AFCDUII9Rb93ng56mhtJ6GkW5aGd4wubKWIi2Qq+OT2NdMLKA6zUI6HFajfMISp4xnP1rlqvU5Iye5yN++WYg8kmvKWtVnu4eC9kUlyecVvVV4lxfKxsygqWJHTiue9jkrRbTZ/V9dXxe02QwzfO3yykfePpX1NZ+8zgqt+1kdF4KvZriXc0IDIoVUP3SB1/Csot3Iive2NM6lNNeXV2qZkeQI0oHAUdl+tcknzTbOrl2TK0OoSHVpJIrQKI/lHy5P4e9ZJc0mbVIKMEWGuJIZ/Kd44guCzkbnz6VOilcqElFFzVNO0bXLI2OtS77ZoioxxIx/mKwq8tX3WW1JTTWxD8DfDfhz4bafres6RbOt5qF7sluJp2kl2KOF+boPYetXg6aowcorVl4t+05IPZFLxx46u7u4fF4Am0/KxAx/8AXrGquWbk38ghBJWI/wBl2HxPqPiHxN8RJLgw6bHGmn2iRkYuZh8zvn0XIUe+70rpy+Lc5VU9LW0M8dTp2hRkrvcufE86H4qtW8Hy6eZdQvcx25ZizSyN0TnP1z2qqii04X1d9+/b+tPkbUH7F87+FbkniLSfFHgKx0jw34h2tfy2sMCyRMXWSQDbge9YSlWjaEtzNYihWbqQehc+JvgSy0TwFL4euzDcX1+N+ouU6gj7n+6K6KlJUoKL3ZhRxE8VKU1ouh8a+BfiLY/s3fEKT4M+OtQZPD/ia9kXwtrdzMSLSdjn7Flhwh5KHoPu+lcPt4Yf3V1OidByXNHdbnYfHHwN4J+JGky+GrnSYpoIoFUyunJJ/i3D606jVSPvGtKpJU9D5i+JWn/tB/BW2eD4fa9B4g0yzicafp2p7tkZHTEg+bHTrk1xT9pB+67olL2lXcb8EPizonxD8GWmn+JdfT/hMIUZ/Eulzrsc3HdlD4LRgAKpH8NVRjJx5up1SiqcbJFL4sfDHQvFenm6vtLg814mcxxoMAH/ADxSmuaLT3ZcZS9m0eA/GP8AZo8G6syre6bFcfZLdBELtd6w55wu7p+lcUqUqSbuYxoc0rs4Jv2X/BQZbaDw9DZzRIS4VcFhj+EjBFEUzvUHGMVHRJ6nI6t+z7p2n3l3bXF9dXcMePLt725eWMBuMbXJA/KrVWd9Tb3U/wCmV7b4KeEdGvfP0/w/bxSNFkHYACfY/nWlaTkjL2cKj2Lupab4b0GwbVtXvYbS2V9ryTEAKPQ/571zU5NOw5ctGnd7HFat8Rk8SyGw8AWUzacshM2pzoVLAY4jU9uvNaumlHnk/l/X9aHHSxcqk7QWncvWFot1CjtP97BjcDv6GpVSysjscmzmfGmoJrOunQbcxtFZtunkRuDIR0qqc3ESq+1fK9iHTdMttOvTMrbFZCdhGQrdsj0rr997Gim9kU0vbyW6b7eyMxbon3cf0reCcVdkwpylK7NeJYZYSqZDxnIIPb3rSNSTeh0urFOxPaXygBFcFWPBHUV7+X1OSomdMZe8jkPFll8YdQ8UfZPCN3bR2TLmWSTHC1+q4DETlBWehNaniXVXs3odh4cg0PR9Fi0fxFqKSzSNmV3cBWNetCrd+8eh7ekqdma+q69o66OLaxCRwK2CIzy9bwkmbxrrkSRVfV7fT4X1ydV4ixFEpzj61bqJvU0motX6lDwjr+tX9ld6tfqIpJg3kxRn7q1xV5Sa5YnFTqVJXciHwtPb+EtC1LU7O1E945Zri5n+7Hn09TWFHnpyvI68O5005Hn/AMPXPinx6/i/UwJbXTZS1mrDAkkPfpzXVCUqtV32OTCy+u4z2tX4Y7HoGs+IbnWdRLSXCiS5fYVUc7jwBXW5ypJ1JSShGLurddLO/kr6eZ69SalJtbs53486rYzfGTR/hlHLdxjwxpg86KeMFTK/LEDP/wBfB7dK/FOC3DMM+xGZOSftJPla7LSx5NGp7bG3mnFxvpp0e+nff87PQx9b1qFnZTesqAYCDqvHQe9ftCqQn8bsj0K1dONzj/DNp/wsj4hQ6ZH/AMgzR28/UbjqMj7qZ+teVhoPG4yMIfBDc+WlKrnWaKMP4cHqz034Ua5BrH7Uvw/057iNbYeMLDdLJym0TrjcPTijjbEVaXCmPdFXaozdl1tFnr1k546EOl+h3H/BXDxbH48/bdu/GEXi211fT5rm8t4Psli1vDbtBcFHjVWdySGBBIwMg1/Pv0ZcFKhkOIo1KPs5vkk03dvmV0/n+R6We4L2FTARmn8D376Hk9lqun3EHlghopowJiYwxkAH3Tnt/jX9Pumr2Wnc6ZTfJyy1R574p8Na7od1LrOhW/naY7ndGB80Pfp6D2rlxFCtTj7SnrHsfK4/D47DTc6KvB/gaPhTXrbXNMewE+9vL9B075rowOJvTun/AMN1LwNZV4Wvdn2t+y94w0D9o79k3XPAmvaFc6p4jsLCPwx4umEnA0o7jpuovlhuNvJ+5ZiCdm3+7X8I+LOQ1uB/EOlXw01ToTk69Ff37r2lNaacy1SutfU+ryRwxEp4WcbwqLkl2Xnqfn7q3gWw0PUb3Qps22p6fdyW80kBwm9GKk479OvfNf1tkEMvz7KqWLp3i5xT07s/Oa+QYGlWlTptxnFtXRV/sjxrYl5oJoruN1J5OxiB3r06uUZlhnelLnXnuaU8tzWj76kpr7mSDxINPIj1S2a2kLDKTKfm4656da82tjnh5qNaLiwq5pRoPkqpwfmXtMuobiyExIYmbjaeucjP0rtwmIpTpcya3O3A4mNSN463GrIltJNbBlyg3Bieh9a7aVeL5oLod/PHVdhk11arErp1Oc4PTilOvSjJdzzalZUpKTepf8CeAfir8cfGlt8NPgz4B1PxP4gvAz22l6PatLKyqMu5x91AoJLEgADJNeRnmd4PLMK62IqKEVu2zkxuJr4pKNNXfkd6vwo+CnwCvI7j9pr4inWPElrMou/h74KkSbYA3zRXd9kxRsRxti8wjuQeK/Na/F3EWdfu8joqFN6e2qJ2fnGGjfk3Zep6EMLhsupKWNqe818Mf1MrxX+0Pa3N83/DO3ws8O/D+KOYSWxtQ9xqAIxgi6lJbPAPy45zW+B4MxeYxdTNMZPESe8G+WHpyrR/O5ngcbOEbYFRi11a1Ou8F+IdY/aW+EvjfxJ8adXvNZ8deFWs7jS9eu3XzJLB90UkEzAZdQdm0k5GSOh4+H4hwMuEOIcBhsvioYaspKVNLRSTTTXbrc82vmOLqYhSxDvK9nstz55s45LfU5eFMYc4wOoJr9qy+M4xSZKUvbNrYPFd40FilkiKDMwKkN2rqxcXy8ncrMsXCnh1SjvI7f4C3sFrqtx4b1aQ/Ydd02bT7lQOu9TsP4Ng15HFOGliMmjKHxUmpL5b/gFCLcFF6o4mKC60u6k0+7iCSWszQzoeoIOKMHWdenFx2aOWo5Ua7S0sXpbqfSp4r+zfKMfXgn3rt9hJSv0PQo1JRamthb21Fg48UaDH+4cj7VAv/LNj3+hrSUYQdmRWpWrc9LbqelfCHxCy+K7DW7dl3EgP83GR0NcOMlCNF8p3ulTdO5+g3wW+N9n4q0iDTppE8yEKjRg8gjuK+NqqfOeXiJU4vQ9MS4+1v9qS5KxuCAc9ayscim+pei1/dZGxibdj5Scc/wD16TTTF7S70L+iW9/oumSTR3eVlByu7pn+VWopamyk7WHeC7VItWS/nlDkybt5PQA1M2jOcuXV6HjFi8etfE7xvdyxjMetTMjMMgEYA/z71+Q5w/8AhUqu5+h5JG+Egzy7xGDL4s1MuQWFwoIB47V8o7KL9T6zDSipakN40qzeWzdDj9K2TThY7lrPQzfErsFRV4Ixgn6UqEnz6BWfcyN7CErjAKg5rotd3Zin2My/LB2PGRgVqpIxrTMyYHzMuQAcADHeuuEeaF0cuiV0dFoVzLb6UqscRtdCRc4zhRzXl4mEnNo0oO2vmegXWqxal4IJWQZghRWAHXqa+cgqkcYk13PsYcssLp2M7RfidB8O/Fuq3kNpPL9rt7ZmeEFtuIxkH/Cv2XgrESnl8rbXPxfjh+yxyiux33w5/astr/UG0m6uWkEq7TDITu6fTivuqbgnqz4KNVN3kO8V+IPAOo6uZpp1hCKSOQcGtJy5kaOrTitDiIPBJ8Uao15aeKWkQHMMKSjaPw71NONtTWnPnhqZviW917RJv7KuZbhdxGJEfgf4V0v4dTmk+WRxmu32veHLiVo7t5Y7hDuUnua5JSUXoYOLlK50nwL8di5mWzu02Or/AHWPJrVT5kayqODsdf4zvrpoZmDYhIOc+tarVaEOpyq5zf7P58uXU7yEqsjTNhz1xWKpckuZjiqlRXLXxM8bWtnI9rDOWcnknnn2ro5ko3ComtznPCPxChtY54Cw3Mudr8VlS95spRUVcxvEfxBNrfToJl/erviJ6Y9DVVoN7HLKraZytlr13rutvfXEqxxjop6ZqadJPUScp7lfxxej7HEUO4RuCCrfpVShGLCVRwaUTI1PVYbmzEXKnZwWo5rRM5SlV0Zw+s3bYYbs4JBzXJVd0Qo+9ZHPTyMwJJ/GuOMNbs+iorlopFfzSi/1NaVFoQmrkbT/ALsg/lXM0YVmkj+qrVNctbeD/iYXzyhWB2K2PLX09zX1FbSbPOq39rI63wFLbi2kuoW80bN67mwFHYVjpytmblaL5dzQ0jUUm0xniXenmsVbHG4nk5rjTVrnRGbbSaH6LcsnmSoDLNn5So4X8ad4xib1E3Pcp2q6nrviE2dtMLeOP5rm67c9vrXIrzk7M3ioxjzSOgaOOzCafbxgCY7RKx+d/Vgf4R704qPNZCnLZlPwvoet6hfXGhWDfZ411RkvZnUs0SZAwuPvbsHBHTFVBzcuRdx4ipQgvbNapaP1tp+Rs6x8H/Cfh6WWaa7V2nAKxzjzpBzlhhsgcdD2rSrhYQbb6mdGtUq2fb5G/wCF9A0L4c/DLSvDMcZSGCN5plc4Ls5LnJHUkk8+9XRhDD4eMEZ1q06+JnNb7HL/AAhs9J1f4ya144W4SaDQNMSO13PuQXE2SzAAdlAX1+9V0VF1pTfRfiZYz20sLCntzPX0RyPxY+NUvh/4g6Z411u8Y2djqaNJGbdjuUHDMMjGADmuCrWUaqqN7M66GEo+wcEzW8c/EBvF9wZbHUkuEnUSLLEfk8k87s9DkVcpzqvmb3/IIUo04Witjwz4mfC7wr8bvH9n4cutOiurPR0ad9yK2JMcH8OTXKqXt6/dIFU5KbUup8z/ABFsv2m/2dPHs2neFNVbxZ4ZkbzP7F1K4KTW4DZIim5LDGRtbI9xUVYOh7sdh3gqehsL+198IfH1tH4TsJn03xDb3WZvD+sWnlyEE4yN2BIoxxjNRKajVtHVLr0f3/qRTnPmvY4L4wfs9eD/AImTanrqO9rqUDolleWn7uSOR/4kZeQOe1Eqjvod6nPluzwbxB40/am+Dqy6BPrNv4rsokAR75THcIqtkKZVHzYHqO/WuSpVqWukROrJKyONvf22fi1b6pquqeIvgYZrMwQiGK3vh5pCsN7Elcfd5HuKyjzykrsuhOvzPmWhV8d/tf3szKNF+EGpOu0LBLcTopZCCecdCD0Ndkqd477Hc5xS2ZwPib4+/GLU0GqaP8KkW4a1Ec0F9efJnI5yq5IxXIlBz95kSxFotQj95l3fxI+PWuMY9P8AD2laWzQAMFV5m+o3EAH8K6o+xcNDGNTETm3aw20+FfiDxZff2t8QtUudSlQbgsgAjjbj+AcZrGcnTvymkqcqllJnRWfh/ToFiig2rsG3cqYTjqCKwbdjdU401ZIx/H/iGw8BaDNfkI8052WdoHB3yE4BAx2zk06dKrUi3FaLcxq1IUo3l12OG8KWUyWwaWRXmdt9wxxlmPJNdMKd/eHh/hVy54mvrbSLqyzJ5ZuCUzjgkdua3UtBVJqnNWK0sq2sv2wWytG4xPHn/wAeFapc3U61LniWoYoMi6t5j5Tfclx+hrpXLDQzWkiKeQw3H38DIJKr0r0cNO7R0qTurHE/E7/hYVt4xs28PasIrC5XEuT2r9IyiUqkE0/UwxkMd7WLpP3XuWPEVpYazbxac+sFXgUbpVOMmvqWqcoWudkYxqwUWzU0WzSO3SG41eQQRDO5zkt9PSoVqfU6oxVLRM1z4gsBHHb26KyYI8t25b3NaRmjb2yfUz9V8WXGnv8AYrGFBJKgCsGxtBrSKi2FSXLLQTxv4lfS/A/2PYxLKWZmP3ie9TUcWtzdxfsG79Dk/hVr9zcaQYdMtwBEx3ykfKuetPC1FHY5MtXNTfY9E+EnjHwj4c8Zt428bxCfSdBt3uprdmP7+UA7E/FsflXx3iTmuKwPDE6GF/i1moLyUtG/kjtniIUJOV9l+J47H421nxf411z4m+JryQTajcvJbLJj93GTkKPwwK8fgLLoYHL1FaKC09er+Z42FnieeVWtu9vQqWqa/wDEzxLD4Q8PzLCZD/pNwekEfdifWvs6tWviWqVN7Car4+t9Xg7Lqz0BLPw58PtF/wCEB8GZwx/027P37hz1Yn0r38voxw1Llhu9z6XDYPDZXh1TpL18/U5c6f4k8F+JIPiPouoW850m6juUDMVbdG4Ycjp0rpr4H65GdOTThOMov5po8LFYXGUcQ8TB6LU99/4KKtrHj3T/AAb8aLW2mXw0YEOjuNJjhtzFeRrO7o6ud2J/MQl8Esp9Mn+UfBeWH4X4txeSya9s3JS95tpwk1FWa092zVrqzR9Fm9aliMPSxLjJezly3bTUk4p3Vm9Lu2tndPS1m/n+3nkjsfLSRGSZgFCqThR1Nf1YnNz1PPqV+en7hbuNQVVlt43YIig8nrjrUYptwtdpabeT/XqbQqtU7M878Rf2h4d8TSXvhuHck0fmSWqnqM8kYr5avWxOExzlRV0+h8NjHiMszNywy5k1do98/wCCePxRuvCX7UGjacsmnwr4hKQmx1pCbS5uI2EkdvcLkZSQjZznG4Gvznxk4djxRwlOq4yVSkm4uNlKN9G16LU9vKc3p1Ma8PWbhGqnto1JLQT/AIKceFvDukfHw/tAeBPDSaX4Y8fPJdLpAtGgGj6hG225s/LYAoFbDKehVgRXzfgRnMqOSyyPH1OethbatqXPHeMrq6b79mjjzFVcpxEalRtxmrXe9139Tw601a01QidrpBGqfc3df/rV/SCq08XU5ua0excMZHEzvF6I9u/Yl1H4c6xqXjrwl44j8PhNX8LGE3Wv2azG3gV98piLgiOQhVAYYIz161+WeJc6qhhqtHmlyzV4x63018jD2OGxrlKouZq2h4XceB9KjvrtvDOqz20MdyyRBH3KQGwDz7c/jXr4TJViKMakJOLaV15mKyajBc+Hm4eRn3vhvWreSYw6x5zqMyFlGDg12zyjFYWm5Rq3fmbwweNhBv2t35or/wBkeIp7rbIiBVjJIUHkVzU8HjpVbyehjPL8ZXq3k1axvfBXwJ8Q/FPxEsdD8J+JbnSbnVZDbS3FrctCRAQTJuIIJXaCSPavCzvCwp5dUxOOs4R1s11MsBDG4bEcym430duqMGz0u2vEluIZfm3t87LncM9TXqYTAU/YKy1Kq4SOIqOo2QtpYjl37fLKcknuPWur2DptaWOerT9lG0Va3U9V/ZpdtS0v4k+GCzSG9+HlzKI1hDmRoJopM88jChjwa/P/ABBVKFbLcQ941kr/AOJNHj14Va1anr9pXPM5bOJ3C+Zgqu4sOc/WvtqNeMd+h9FVh7O9uhgZl1vW2lEeY4sqhDVjTxDrVHLojwcGpY7GSqP4VsdRYTf2eqTQz+WyEMrqPmDCutpVYtT2eh7iTU7I0PjFbbfE1l41jgkW28Q2CTs8mP3koG1yMdiRXzWSzWFVTDS+xKy9N0Ga0uRxqpaS0fqc9a3sJB0+7cbX4XnpX0VKvfRnJhKj5uRl/SLo6bI1jdpvhl+V1xw61q7SVj2l+6jy9y/4S1O48JeJIofmNlNKDDL0289PauOdCOvMzi5aqqOL2Pr/AOCfiPQtMuoZ7fWomu54xJtST+fbNfL433W3Yirh5pXsfRmieM21W0S2tnX7ik4boe9eNduWhwuUb2JPEniDU/DyC6t5W8wDJB71UpWM5r3boj0b4s6x4isnjtkdZB1YcBqhOzvcVPmvdnafDz4iafFaeZq0ojkiceZE5681o3fYtyU24taHl/gLWkuPFfjO9C5hutafzArc7C4/+tX5Pm1P/hRrWf8AVz9HyScfYRjFaHn2oulx438QQy7lC3gEIA4JyuM183jKcIRs2fTYaK57Fe6YNdMrLyJPWuOk/cZ6UdJmf4lKeaoLen4cVtQledjLEOzMZn+QnsMdK6UtyYO5lX85887m4Pc10RilHQ48RJJlCZwQBg5AyMnit6dkjnVRvQ2rG7lfT7ez25CwEgbgeSea4K9uds7aMGtGdJot+/2S600ybhNYgge615NamnOM10Z7eFrW5ot7o6/9n+Tw/eN4puNfto5zvt44vNUEgLEuSPzH51+s8EYfly+T6X/U/HeM8Uq2buL7flb/ADM7x74K8EC9GseFlZbtMl1UY/lX21SK0sj46bg1schHruh6jaz2UzrHeEYIbr+dSnfQil7zaOZm13WPBepm4s9QlWPjDB8qBW9NRSJxLnT2Op8O/ESHxmT/AGyI94jK+YD96tKkbx0LpVI1InM3WqQzeIW0q5cmMDKBuSPauFxd7CU3e1hljdNpfiS1lsiVUSlWbpmtOVxjoZ1lzas7Hxl4vd9JKeZjbGQVz1reLfKFNKWhzfgjxZceFtEYI2WuHY7h2zWNJOc9TplOMYqxn3093q94dRvCWw3Hpz6101FpYxbdRamRrUM+j6gNVsJhL8nzRhutKDS0Iq1Ixja5yF7qWp69MWMZRUkIVW9M0TlzOyORLnYqy6tZTFtjJkcYOaKSszolJRhoZ+r3eqXtwkc0hUZ+YUVPeehxqLkynrOpOqhRIRt4GaaV42aG5ODscveXjyo7FuSeBXFiHyvQ6sNSUql2Z7u0ny1l0uerOpZWI5cICDyfWpndwOdVPeIfMV0O8iuWSaLqpW1P6ltUkh+1/wBo21gZ+QMOeHb255xX1Va3O7nnVm/aNeZ2dpNcW3hP7La3KrcXYwWA6euDXHUvy2RlGCc7mn58kGlQ6LaRFYYUAK95D3J9K5pxsrHVSi43kTT6je2Glt5SeXAoxsRcBiffr+NZtS6G0eWpPUd4P0zWDDJPdxRPc3B/dW0Y+RB2J9WqKdNxvfc1xMqfJyW0LuhapfG91O4eOW5isrY/bNkJJjYA45xwR6fhTgr1JO2xzzUY8qT32Oj+HJuvC3hD+37iErf6s5m/eLhkQjC5HrtA61pTUaNPmluy6vPUfK9kcl428f8A2KznvY3IuipCksSxb+6uO/auWpUXxdTWgpSkkny2Ol8WaxqlxoVmdThuYnFgmQ6sFT5Bnr1PX3rfEcsIqTvovMmlGkm7O7bOQ/Zp1+31v4seKNFOnz2ul2+ixSyzSFlFxOXYFfQgAZx71yYatVqYmUfs2KzOm44SnJfFcy/2gZtD8a3DeA9G0qa7muWWOEF8wIQGBCAgDJySx56CnV5a37uJyUlKn78meFfDfxTrXwOvtZ+AvxLt7yG00+Mah4fuLaFpTHbY/eQMuSzBC28egbAwABTqSdGPs7bL+uvz+Z21KsZpSh8zq/gb8QPBl7Fq9/4c1q3v1nkMQntJ/McbjjDAcqfY9KnL5xasmGJpy5E5I574ti3/AOEge3vCkdvERG0jDLb2BBp1oSU3czpRUl72x80eDvh94H+N37RfxB+F3iyyUyL4RtLrRbhothSaKV/MMbjndgoeK5IpSlZnYqUacVLoYOqS/GL9nfWL5/G2n3fiTRWjBh1W1OZoo1Pyh4xw+P7y89Mg1U6M/ivuKpJJ+6Zeg6p4a+MPg6Lxno+rQ3aS3xN3FECWhlY4Mci4yg/3hXHySfu9iOeDaT3OQ1fQdEtWvhd2NvbpHGygoAUx75/zzVRikd8LtJIo+LvAvhOTw3ba9G1uLW7hWVH2jELHhlPtlSR6Vs5NKxEqqU2jCvPDNmLAWSaeJTISdwTO5VH3gw69a55RXY1puLRzN94RgfUFjtQmYYQ6pIRyf7pP9KiN+hopRWxHNJ4ZOYWvIrOTG6Xz5Bt3A98HIPv0ok31KvpdnI+KPEmktdeRot9BecFnRMuF+rKRRTgpsy9qpNy7Hit3Z3vjfxCfF+pXW6WIsmnJz5cMYPYHue5rqm+VcsdEcqh9alzS2Wx2Xh7QNQ8Q2k8mm2zC8tIS9xFEg+ZB1bnrRFux3+7GGhzfj/TLnxP4Xk+xyZuLVhPbllxgrzinBw9prsYukqsbrdE/hG8s/EGgQX6rjzIwWK9j0OatKXMONWLjoFsP7K1FtMuciOXmJhnbV7TuEJPmsQayzJMEdwCOBJ2Ye9ephHdanTdxZxnxmuns9DtNUWWRSkmG8scEV99kWIioOJvOpy0Ls4vwvd3d3cnUb+4l8leYwwHzH8a+lpzlPVHHTqpz02NWXX9Z1S+WC3vmDtxGsZGF+vrWnvM66jlJGzpstn4bw+q6gZrgnLK5zk1v7WNOI6UvZv3mJpniG18Sa4zxAzKj/K4UhRVxra6HYqiluM+NetlLCG0tFdZCgVSGxkmlVU5JSi7MMZiJrD8sOpX8P6kmjeG00TTwoO0G4b/a9K6aSVOCOrDSVLCqKNay1Wz0HRJLKSJZ5LhfNmt5xlXA5ANfN5xhKGcc1Cor21XqjmxLcXFpX1PM/G3iy3tFaa1tEVZyWitbccF2P3QPQVxYX2eBwSo0zgzrHwwzXKrt7JHVfC3T9Q8G+FJtb1ePyL7Us7hnDKnUDrX1WWYWdGgpz+JndlUJ0MNzVfikMh8QXd3qcl8uDtHyvIB/KvUpzSdrnoxq87u+hG1/ceKfENl4QgbiWTfdhRnKDk5zWcsS6uMp0YvZpv5GWOqfWasMPH1foe5eOrTX/jN+xtqtnFcxte/Da7ENgrXTmZbN2M0cQiHyBQfO+Y92A96/AuOJ0OEvGWjjYR5YY2PNskuaNot33u9NDslh1jcoxGGpr3ormXyPnTwd4vD2qXxYFmTBJOQvBzx61+/4bH0a1PmXU8bLqlKvhYt7jj4hdzLGnO4EAhuM96zxNaDgzb6zBOUfuM3wxfPrnjC5uZI1EdrCI/nH3vUV5OVuOIxkqnZHz2XSnjM0qVZbR0Oov/C1xqN4dd0u6a3vYLlJbGeJtrJKmCrD3BGa9OtgqOO541FeMk4td09Drx2UU8U+dO0r3R9ZftX+M/id+15+yVoHxg8QvPr1rLpzXGsybF8rSdXsiIbiNUABzNCVkyCe3FfxHwvhMD4aeKlfLZWpzhUtbVupSqaxbf8Adem3zPqqeGweZ5S41aXvpXv6aP8AU+IdQ+H1o0S3PhjWEXzl3CItnPt7V/ZdaNOqubCTs3rY+TxWUU6bvhJWutjO0LVdU8FazLZ63Y7PPjaMytnDg8H8PavPhCWH93GQvrvueXgq9bLMS1i479TcsNTgQzWoYKJSSu1vujIOfyrtwmIhGbjE96hOEru+hG+pnZuAyJojuIPU56munGYnnjbuTWxMUrIbPqhW8aIEDMQGB2xXBRxCdZxNKNaN2jd+FEz3XibW/EQufs9v4e8K6jfNMHKN5phMEC++Zpoxjvk5r5fjPEU8Rh6WDS/iVIKy7KSk/lZO55lSp7TFPleiTf6HJaOJLG1JVgrIny8deO9fRUKVSjF+TOqnTXs7McHe7C2su0bFPzAevTNdc4e0SuRNKpaMtkdp+y9q1n4a+NekrqNyFstZhudGviXxiO7haHk9uWU/hX5nx9ljxPD9SpBXlTlGovWDT/Q8epTpwvO2x554wtdQ0HWLrwnLE0V/BcyQXUb5zEUYqwOe/FdtPERxWGpypO/Ok9PMyx2MWIao0fil+BDpun/2VGsZCvG3fHf3r2MLhpUIWexpQoSy6j7Pe5cKJMGST7+PvEda75wcqdos7Fd09Xqb+o26eJ/gks9rcmW40TUit1AwyYIpB8kqY6KWyrA99p718hXisPnC51b2kd+7Q6dR43CSoyW3U4fSWtb7Md0AJY+DXv4a1SPLLdHnUpw5uTaSNezeSXFjdKHAwY5AeQP8K64UlCTZ61OpKUOWW5pWlzBLAba5wyqcdehz1rnrKdzeEIvWW56T8DvA3xBbxNDq9v4oZNJWVXZVk+9joD614OOkuVxkcmIxtZKUFsfS+m/E6bw5qCOZgFOB1wPrXzU5KnojxXBuVz0yHxXH420nziwb9394Go1bNtbWNvwVfaDpFjgGJpEPO89KfsubUp6Iz9ZvLTUNcFxazBQzgsEbg1tbliYtPoYfwuvvsni7xJZJ8iteBmcnIzkV+Y53SccbUkup+j8PcqwqXmctpd2L/VvEeoSSBpBqQ+bGP4gK+LzO8ZJdz6zCLnqshvsm6YAjPmHJrjpyTpne3y1LGZ4tUiWPOcY5rbCyTZz4u/MmjJnA8lgDnsD26V1qVpNCpv3TE1NC5Z93GOgPSuqnNtHLXs3ZlE20jyYDh4mP3n7+1ae0VrI53FQdzorXypJoPKjCZgC4AwMeteXOMnFtnZGq5SSRuaIgGp2saf8ALSCRCD3wprhxE+WhJvo1+Z7OEpXxMU1umO+HviN9Eh8SWzMfmvV3ZHTEaiv2PgufPlumzPyHi9Qhms4+f6FO18UG11A6gl23luSDkc/iK+zmopo+IlNJ2Rx3iPwlda1rs2rWl4wVUySj4yPpQoQehUWoK5jXOuxW2kzWd9mY9Fc84PpUqCgyKtSVRWMHwfq+p20rwQE7C5MYY9s9KJ1YRVkyKUKkNzu/Dvw91vWzJr1zdBJdvyrnFcsJylqd0aa36mPcXGq6bPLDqMLBoZ8q2eDW85KJy1r35WGo+Kf7UR0ySDhRzUyrWixU6UxIb8PdRgkiONQMY4ooTvqjaaSVnudBFrWlTQLbnbhgR8vUV1pcxjBvkOJ1aK807Xp7hblmiZPkUnIFKUVHU5XTk5FPRIZmuJLy7YBQThWrHVy0OiKjBW6jNU1G5DyPbwPIEH30jJC/U1006fLo2tTNwc9kZZv4rmJ7mR1JHT5qxlGSm0jn5rOyOR1vV43u3iTHA4x2reK0uxxpykzKkm+TdIea83Ecsquh62FhyQIUkAyc8/Wpkiakm5aFe8uT91epPWnZcuoWSd2VjJiM7mHSso0+aWpjiK3MtD+n7T7fxLfzRvea9FFbFw625QFtgHr/AIV7lWEpVXcxxFlNnpHh3UIdQkhUyEwKgBQphm/DsKym1AzirmiL0vcGOztXchsjjhvc+1cr11O+K9wTWtdaxtjd3XzT5yofov8Au+9ZynybhSjFyNLwfcnV7M2+ps9sCu5JfNCqp/2iRyT27Z/Okr9dBV7RknFXsd34Xa70Pw2oudVuC8yl5yXAaQdgxAHQYFaOc4Qeu5MoQbTscN8QfirPpdpcy3TswcYiPfA4GPrXnVa0o3v1NlCUtEVvhh4A8fa/qdr448a6ZbaHpYUi1TUJP9IlLdH2fwj0JNa4WjOo1UqKyCrUgqbUNWez+N9S8OaJ4anttVeOQyxACNXG7tjH+NepX9nGm1I83DwnOspLoeOXl1C6alrfhu0lt7O3hIu7lSEMiKMlQSeTgZ4ziuHncbzgtD03Vg5ezvqVP2bNXsviFaXXx9uLQSaTYNLaeF3DSHz3BKyS4bCkAgqCBzzye0QVOdKNWNnu7q91razvppa+nR6u+iyxMIRl7LqcH+1Pdx2nizw58U7fQVRBqiW0rzSonnwzkwsNoAL8uSTz07YrnxdScEp8u4sOuROKd3ueG/FL4IX/AOyhrK/ED4RwxW95Z2yy+JLYDEeoyyfOyPjqy5wrdqwp0vq81JbHdUrOvQvIr/Dn4qWv7VXh/wASeN/Aek3a6V4YjM3iu7vLZ4Y9OlRd3lGSQBXcgnAXJ6V304SxN5x2RxLE0qKUG9X0MGytIPCDN8S7OM/2jMzXiKY/mMHA8okY+8v8645JP3up1zjUmrNG1r/ijRvEnhq3vrXT8afdxK9t56AqYpEyV9ip4/D6VlJy3b0LhR0s0fJ2t/ArUfDPjrxB8RPg74oudHv1uVMNxZKTFcZPAli5V1PfIyBnnvXPKN5e6GJpU4axZkeGvGD/ABYWYfEy0FtqtvcGC8gszshZuSZNuf4sLx2JpRjeV2a4WTitTnviBeeOvh49lp3gbXLafQdUnKXen3UXmxq4JUsueVPYgGrvrYdWlUqSvE5248X/ABS0LTDZHwXbXlom9hHY3LxOhPUISWGOPb09KmUtVFHRR5YR94dY+J7fxfpks8GqnTLyBFLaRqdokmATzk4+bP1q5QfLZmtoSXMc94m1jTdduCraQlrdsm2S4tpN0M6kc5BHH0rncZ3dzP2jktDjvGrjwdop0jRXiW6vi0bCEYMcZ6vgcc1K1djCporGH4Hso4pj4f1VdpKZtpiMK4x0+tdUueT5pf1Y66PLGnZFrxfcXnhfS7mezvJLS8VhDHJDJgyo3BXPetaceY5py1s0VdOgeOzRiNodNrMwz+frUclttDqptJWOa8IGbwj4n1DwtJIBCZTPbA9CjdRz710VKiZ5ybhXkjpfEWnC6sQyEgH5oXzyD1xWNObvoejTt1MO4vXvrARzRYkiGOe5r08JdGrqJHE/Fe6u/wDhEVMX3I5csrDivs8hcXUaZnWU50jzS+1CS8SOK4utid/KOMivsKVaF9zjVRRkuZk+h6/Y2FwzW8pyi485n6ewrWeIhGNjvjWhFXTGpq76pe7bi6Hl7/mYsckVy87bMKdRzneR1NprkdmY7XSAIoyw3Hby34120pKMbs6ZVHKXulnx5PDftCbkGRmQAZXke9KdWpJcqOucoumomF4ZPiLUPEcdrdeJYY7GHLypJGFG0DOCfXsK5ZwrQjzqV/IMBg8VPE3lU93sWrXWtU8S+K7lRAUtxbOWl2nakY4LE9hXJm+PWXZe5qVpy0XncwzDG/VKzi1p0OS0SSC98aS6qIF8uyJSyVjkZ/vc1jkdKrVqKpWXQnBU6eLzKWIkvhWh1Wsa3qC2+17rfI4J5OT+FfV4nERpqyZ6lTESUnZFGxu3NhIqOVZc+Y5fqaxw9f7Tehz0KkXd9t9Sz8IL9m8Q6t4luYTMscfkxHdx708qcp4qpiWrrb+vQ5cjxc8bj69ZvRaI+jv2HtXuPF3jPxd8FbPxENMl8U+H/tNs7Isn2iSxbz2twCD80sXmxj/e6jqPxL6Q2GjHL8Dnfs/aLDzcHrblVRcqk7W+GVn8uux9HlePjgceqs482yt87P8AM+UPHPhK90H4na94R8KXcsFna37tawXsOxxGxJAYZO0jOPwr7bguvmGccP0Z865lFXs7p/M+SxeCzDD5xiMNQkoxvzJeT1MxdR1nRMNqensvlufnVSyk9/wr6arXxGHhy1ov5HNKtiMK060duq1Lvw7lmufMuPlVryYs+RjAq8iUlFy7muTVl7OUusmz0MaysS21yCEUDK5P3iAQa+ndWnF32PecUkrs+mv+CeM/h74rfDP4n/DPxD4oEMejeRrmmaI8h2XomU21zGqdGYhkbGR931r+P/pGxnl/FeW5rg6N3Wi6c52Xu8jUotv70enkeOpLEyovWL06WV/+CfHCaVqngnxrrfhnVHdX0a/lt1jlXBUKxxx9MV+68DYqeY5ZTxnNdOK/I+boYetRx1aFR/A2vl0F1C5s9aPl3UImj2kMuO/HP5191z0qsbVNjepOhXXLVV0c34i0XU/Dduuo6TL50cj+WlsTk5PpXzuOoQwTVWk9H0PGx8a2XwVTD+8npYksn8UWcS/2ppgthL8olY7lUf3T6Vy1q2JteUbHNRq4yMv38OW/UvWpSJ5fNCSNtwcnrnvXTgqkeWTb1PVoTp8zTd2b97fp4X+CUsMUKfbvGmtJHE6DDDT7L5n+qyXDp+Nua+bqyWY8Sxa1jQV/+3pf5L8zw8e5Uq0ZR+0/wX/B/I5+0vFkjmWRArBQGyPu+1fc05KpTbR71Oq61K4+a7gghkeVVUxQ/ezjms3VjFNs5/rEYN83Qo2FxN5SzQErIPnVlfBzngg+tebjIqthnGS0kmn6PQ5qqjOmvM9E+M/h9fiZ4Wsv2ofD8JkuLmVNM8ewxxfLZ6iq4iuSeyXCLu/66I47ivzbhvmyvHzyqra0bum2947tfL/InD4JRqfWYr1/zPOHmht4ZJJJwy4wPrX31SpSpQbbN8ZOlCDlJ3I9OvLbU3/0Ny7McFQcke9a0a1OrC6Zz4SVPGK6eh0vwajN/fa/4au7aVhNYuspGSNu0kFvoQD+FfG57ioc0JPRwlo/XoduS4im8RVodUcTqWmTWUh1O0UloWKzqVxnBr3VVfIqi3OHMMDUg3WorVbos2WopqCC4to/mUc4ODXXSr+0tYrBVoVFzM1g7lBc7cK42yKvr61vUlGJ6CU5u/Q9U+COtX1vZM7XTbUxiP15r5rMuWs/Myrqmlc77xVc6jcWQvbbftUfLXzVWlY86TW523wU+KsVxpx0eW42SKu1lY8k1jCUr6nJOvZ2Zs65ruuWdwbqyvGUtzgdMVu5uJpFya1E0Xx/eteIZbsiQMMN60m2zPneqOt+BN1FqvjHxBqGqSLHFFG0k0lwcJkDK49ycV8RnsIKs7adz73h+NqJl+CglzZ+Irhf+WupZT3G8V+ZZ3XUa6R+g5dGPzZJcx7rp8KOH5PrXn0qi9m0dM4fvLmX4xjw0YAOSveunBNznyxOfGLlsYzRF7dgeBkYrockrmdP4DI1KAlcFM8HOPSumlNM5Ky94p29qJLhZhGSFHT+EV089oszqJN2NuOJhewrGMZC8Yrgcl7OTZ004XqROs0WwSPVdPlCKVSGXcx6/dOce3rXgYipKdGovNHuwTp4mn6MxIYraz1/xBo11cgPcMksYI7tGpBr9w8PuSeTqfm19x+J8awks7nF+TONutE1a1kuI7u7XC5KKT1r7pqKuz47lUZalWwe/ttSMX2iQKUw6nnrXP7ZqVkbTfNHQd4k8EskFtO/yLcnKHoDTxF6cLswpzcZWOot/wBn5INHt720vIwzpuyHBPNc9HCzqpM3q30aM/UbPxT4SlFrciTAGF2rnP5V0SoPDP3rfeVSlVe6uVtRt9V1qyMcWi3MsjdStsxrCrVhTV2zSVGb95xZj2Pws+Il1NusvBeouucj/RyMfnXn1Mbh+s0aRp15K0abOgtPgd8VJVDf8InLEDyxmODWlPMcHSi/fHSy/GV5tKNvUvv+zV8SdQjUxta2jt0ZnJxVrPcBHudayPFtboLb9kTxbK4k8ReO0ZRw8Vrb8j8T0rgxWfRf8OJU+Hq07NzOksf2f9M8P22zTbSKdwMGa8DOSfp0rzpZzjWrJ2XkdmHyjD0mur8y/p2jeP8Aw9aXGm6bqdtHBOv762XTIypX3yvNZuqq7UpN39T1oQdCHLFK3oedeNP2crXxeJL37Y+nXEpJM9talU+pWvRw+aVcKrbnz+KynD4hupHRnnsn7IWoWlwZLv4ixOnqlkd2PxNbTzqrU2icMMpqp2lPQiuf2bPDMCZuvGN/KO/l2yqD+dTTxleTu0b/AFGEV8TIh8FPhzYYSdtTnIGTumC/yFOri8TLayM1gqKd22T23wx+GJLCDw0zsgGfPuWJrmdbFzVuYt4XDdiyvgTwLalRD4SsDn++hb+Zpr6zfWTJeFw6V1E/fRfEFnZ6+ohtTOXYKIhJk78dT9PSvuK2k2eRWUpVHzaanfeC9Q/tq9kuUUBI48TOy4UKOw9e1ctS7ehXNCKsbLarcQq5W5LFz/AvRff0Fcs5WR0qMWkZGs6tHdazaaYlhJcgNvMcacADnn0FcdWfvIunRsnqejeENJW/0+HVNcV0kEm9bO1ePYFycFhnIAx6Z5rSPNJczJdSPNyr9Sbxx42trS1lRigQKQwRuMdAPelN2u2xcnMmmcP8EdPf4z/EqfWruHfoPhsgySkgpc3RPyw/8B6ke49ajDUfa1Od2cTabVGmu7Pc/iPpf9paQb1w+bRleYoPvqOq49B/SvQqxTin2OClXam0lozh9b1fSPEGswWus3MVnEy7lhkkG6TA4XPQGuOdp1fedjTnqwhdLUreI9C1/wCIqnwh4QgEFgFCaheIgEMEJ+8FPQsRngeuTW8Y+0fKtjl9o3Nye5iSfEX4b+G/A1r8H/h7LHaWXhaP+zGst2DCYwQXIHc43Z75zWPNBw9lTVrG9OM51OefU+Wf2u/i34H8I+ALi88UaTqV5cKQmlPbZlEcySIY2CBd3D7cndwDnB6VxVOSMeWR2KlVnLkps+gdR8Hw/EnQjr/ieFpNJgH2qZOhv7hlyI/91c8/TFdlSHNDma0X4nPK9KPsz4i/ac+GXjbwl4lmvPhZ42vdL0nVNXjmvPCRunOm3twMBGlhVgCw4wfYelcFWq6KcabdmbUKdKM1OSu0QeJfjhqHhLSp/Cvxm8M3Xh3U5I2MN/Cxns5FKj5Qx5jz6EY9KxlUUY8rNq1Z1GrHzh+z38X5vC3xm1fwj4r+K06+E/E95v0lZpc22n33YMSf3aSjjPTI96e8EkV7ScIXb0Ppi+sbDw0lxaWKjelsrXWyXcuwhwHGM8Etwfb8qhBRk4p6lqoqkVY8f134USXfjy8v9JeVI7mYfvUH8YXd27jHb0qKukdAi2noY2o+CNY8Q6Suma0gV9MzKSiH94wY5bHY1hGEmdsZNbFKOex0qKNLC7ie3ETtLGFy6S5HzEehG7NWqbg7i1buzjPGWmm21WPWIFEcb4Pn2wyFBPQjuP5Zp1JvsTKbbscv451PQfB2hSeJdeCiApmBIWDGeXOAgX3/AK0op1GrBUapLU8Q8D+IPFXi7xpfz+OEEU15IZNOjUZWKEcCP8O/1reoqUZLkRy4d1K83zo73VbGK2skWZD5IOQ+3DIc9j2qU77HoaQjY4v4zalrtlpem3U0kd1ZWt6JJpFGXUH1Iq6M7TscOKVXmi+iOp0oQ6noqXtqCyyKGwp6HHWocldo9GHIoXRgeOdMSaxh160wL3T3/eow5eI9aE0cGIjeXMiaw1dzZBLvmN13QyHp9DU/CzalJsw9QliS7dhF8so5APQ16WHk0jrVra7nNeOVk1HwZf2ixebtTdgDkV9BlVVxrWZtJp0nE+eZ5Lp7nZ9qfYTgoDyPavqIScal0z5CrQnCtzc912NW2vrK2KWyR7pe46ivQhJX1PYeLo04KEVdksmokXAaRFGOgPc1v7WEVa46NZSlY6jTdbts25LbihA+RflH4/0q6c3LZnqU5U4zSZf8a6s4lR2k2nblBniuxJwhc2xVRxgpWMJNTijAB3hGHzSA4DGkpR5bMrD4hcq1Lei+NJYlv9KScpb3No3mQgZafaMhM54GRn8K+U4rwzxdGk4Ru4yR5WbqWJiuRXaZyXhrUGh1V45oCiO2QlerltdYetyW0M8rnVpYuUZaJmpr2tzRlrqRN2PlWPPU13Y7FRVO63OzMsQqUHrqa8OgXt74bj04asbTzEDzsseTk8/hW+EwVbEUopysmdtLLKmIy9U/act92aGn3Wn+CPDy6TYQb4clpJpCCzsepPoK9eVWjlmFVOCuurN6FPC5JhFRpa9W+5f+GHxAn8JfEfRfHmi6y9lJYanHKbyEZaOMttc47/KTx3r5XizBYbP+E8Zg/Zqp7SDaXeSV1+RpQxdOliIVFqrnf/t2eBvCnw9/aO1DVfBHiR9b8Pa1bpNpWuXUUkb3wGMyYkA+UluMAcY4HSvyvwEzjF4nIZYfGwVOrDeCa922y0b17/mVnmKqzxdPEyp8nPFKz3ujyWG9tWAt5GWSMsS25ck1+/KrSkuVu69DzqNaEpcsnci8E6VDctqElq3lKjkRMOnPavLwtlKbg7K+hngsEnUqThtcXxNqOoaft03ULYxiJf3Mg5D985r0K/Nb3isZVqR9x6eZ6r+wV8WPAHgP9pDSE+JVtbnQfEUEmkX088e4WUsmDbXZ9RFOsb49FNflni5k1XPODak8Jd1aL5ko7yVvej/28ro5MDDD1cRFV4KfvRaT/mi1KL9U1ddmb/8AwUI+EXin4RfG281fxbPa38niSPzX1rT1c2l1MnymSN2VQQ4w2AOOa+M8DOK8Fi8mqYKC5OTaMviiuzV3sfUZnyU6v1mSt7Rarsz5zs9Qk3SlxkSOVUgV+yU8Y6kpXd1c+ReJTqO3oWNKubnXdcjxCTDYLx8uQZDU4eTx2N/uwNcHUeOxt38NP8zbvFi80W7QlTKCLlGwRkdVOfXmvflQp2s9nuepiYRlfmV0znZdG1u41610HwxZmc6tcpbWMW7JWV2CqD7ZNfMZmv7LpyxEXanb7j5t0a+Dq+5rGWi8jT+Jeq/2h4tXSfDQW50nQbFNK0ZmYjzI4ifMmGenmytJL/20x2rx8oo4qhhPayV51HzP9F8lZBi6Vd1FyLmSVkc1/b6WE6wXVq0RUFWEi8N75717scfKklGasbUcbTw9PkqJpjNW1NdUCtczgnA2gYwwHc0qtdVrO55WLq/WPebL9pcBraJ4lwChAP8AerqdWnKkk+zPdoKMqEbne+FfFbeA7u88M3qu+ka1Ypba5YGQhZ0yGBOD95GwynsRXwMcLHM5uu1edJvkl+aNqGIVCo4NaM43xz4R/wCET1AxR3H2uwm+azuR91kPIz6EDrXt4fFOtG1VepGIjSW6umZNlYWmn3AvdNYDoSAa6o0EpqVPRHFy08PK9FWPpT9m39lvxrbpJ8YNP+Juk6VrWveHLxtH8IS2bTS31m8DoXmkBC2wkAbZnLHAOACDX5RxdxBgKePdGdFygpxvK+id1062PErYidLMZV6asvzZ518avg4nw18PeGfit4f1r+2PCPjKCQW+oNHslsNSh2i8066TnZNEzBh2kjkRx1IH2GWZtGrJ0Z6Sj+MejR6GCz6nVryjVVjzOTTI7e4N5pOGjkGXQHpX0dCk1LnjsaVKCp1va0HdPoW7KUKpQEEuTlMda7pNTidqnOtGy0Oz+EfiuGya4sLooWHILnBA715GOUHruwVB3u2e/aDqGk6v4QwpVl2ny27mvj8VNyqNR2MpVYJ2seY6vqN94T106lpZ2kPyvqM1y3lHQ8nFuLnoek+FPi1aeLtOWymbEy8EDvXXKFne+xVCU5R94q6nrbaffhkkPytnB7VPPfQh3uzb+Gfii4udVubl2kJmO1Iyx2ZyOT618hnSi6slY+yyWrOlRSTPSvh1mXR9SkeJd7Xjcj6j/CvxziCnKGMs2fo+UVPavma1X6jpbf8AflmHO45FcVOcIxPZcZSdyl4n0+W8hTAyQnBzXTQrqL1FiaPtIaGTLZPEhBjA4HB9a29opNmUKHLEzLuxeUgMnatoVbHPOjd3IYdMfzR8gGB1I4NbuuuXcxnTvI1bLTCLuGXryO3WvPrV7wkjqpU71I6HbQaVIbyznRMbIHVhgdwa8GNdck4vq0e+qPvxk1sjL139nT4k+PfE914p8NX+m21jJBAm+7udrllQA4UfhX7HwFmlKhkPLL+Zn47x3l+LxWdt0UrcsSdf2MvE11cCbXviZYquOFt4mbHsa+wqZ3T5nyJ2PkVkGKn8ckjbg/ZQ8H6deC71DxLc3UgUDEUIXPvyawedS5rqJ3UeH4KPvTubF38GvhfPbwwappf2tLYfujPcEAH3ApYnPMRUp2bSR2UsowcFrG7NWPRvDunQLaWGkWqKqYRRGG4/GuFY7EP7bOhYDDxd1BCkxyZVbdQy8ASWsY5+uOazliKst5M6lRhBaJCfZLvcFjmCtn5kCqv8hR7bm+LUGrFiDSdfvMrba3MdoywVsYH1rmajzXsS+a2hUvdD1+aPjVrl1LYJEpUjH1BFXzwXQdGMlrcyb3wj46+2FrLxEwhIASO5Yuw9fmUKD9MUoShe8kaVVNxdmVT4M8eyMS2uxDJw6qrZ/nxW8pUHE50q3LYjk8L+K7SUTX2tRPAPvRyK/wAvv8vU/SolUp2skVRpSjdtlJfDVxq08otfEksyx5AKRSoPzIFEKsY6NGs6btuUdT+ES6mFP/CRXKGXIYPMy/iBW0q8bbHLUoc8bHPXXwCvgzvpnjJJCFGUknbp6GlTxMb+8jz54KcXozE1r4H+LLOMRzRSyLksGhmbkD65Fd8MXTaF9SqtbGHqHwo8Zx27XFv4a1CYrJtYTMh47Ywcn8qJYin3MamGqR6GLf8Ag7xlYqHuvDV0m4H5hbNjGe5xg1UK9FrVnOqUm9TOhhurclJraRCv3hKmP503OMvdTLnNQjsft3f3emtqRvSpWGJ8yuhIyfQnr2r7urG83c+crczqNHpPwn126v8ARJ70wBSeEDJwqdse9ctSairEOk0zotEvIL+4neOImJDiRgDgEcc+tcLbk7nXyuMU7iXWtixIgs4zuIPmS4wxH+0TwBUWSd7Fxu48rdzrPBup6dqmhNqcFtJd3CxujtasTGPmOGwBzxxnIHFaRUeW/UTTpzXNotDivEHh3xd8WNZ/4RTwk32K2XAvNS2/u7aMnk8kZbGcAd68+oninKF2u2nW/XVW0vrrrpbquiFOnSSatZdD6M+HPgvwB8LfAdj4M8CQRyWdkmfPLbnmlPLSsf75OSSea9fDUaWHoqEDyq9WpVqOUlYh1/XriRhbW8as75CITx7k+tVPXRGMIpO55V8cPDHgm30+PSbbWlsNc1SdIYVik3NKWYblCc7flycjAFcWJpU5RSTs2ddGpXqysk2kO8SeOYvhd4Ug8A/DqUadaWkOJHyDgj78smRySc8VK5sPFQTshTpWquUkfF/7S0PxQOrT/FT4Q3csN20jJ592mU1ORztAkXuMnj07VjbVyp9/vO+jycjvsuh0njjwlrHwQ+Gsmi/EDU5tY17UNM+267ff2jLDEJdu8wpCGKCMAlSuPnwNxOKqcPZxafU0oRc6ilHT1sfQGq/FbTvE/wAO9O1DwxNALVdMhe1iU/KWlQMDx171vVqxdJI5a1P96zwH42adaNcaTYqhnubO+t3uZJG+WRzKrN+QxXBOn7WSSYJ6WtqdZ+1f8MPDXi2Z4bpbZgbcMRJEMDEYbbn3Na4ihGMTOCbjex8Z/wDDNPw/0f47aPZaxpCJp3iBpNJuIpF+Tz/LMkLHt/CV59a4rSjPlNv3koNX0KnxM/Y/i0XUJ9M8GeMdb0m3ug0Qis9SlSNdpztChsL0HT1rqpQ5Lt9TelBRWx5RdeDf2kPg1qXleFvivcXttbXAkSLUIRcKhHAfLfNyOpz3rGvh6N/dZp7JX0N34HeHvij8QfF1/wCOfHfi4yXBzGqE7IEI5K4GcEnj0+lYxjZ2ZXtJU1ypnY+NfhLZNq7apo+oB5I4Fa4tsYKHPp/EPQ06suxoqjktTzH4k+L/AA94C05m1uQsCXBtcfvDL2Vcdc1i02rEVKsYHhLaZ4g8da2niXxShSOAFbGzB+W3Q9Mjux7mtKT0sSqc6s7vYu694LeOwTVrK0IuLF/NQoeoHUfjzVOSXuo7YxjSVy34o1GG+8Jx3tnmQyqpCsOme2alKTJu6iujFTTtPvbFrDUId8MsRSSJ/Q9TRGk27l/FBqRg+Bry58Ma3dfD26nDpbjzLI+ZzLAT/MdK6q1ODSlCNjgpc1KpySZ116lte2rROoYEYJKgOP8A61ZRsjqaU0cdDILSWfQrwEmMloSTw6n0NN8zd2a0Yrl0MnVLmFoHtCxz1jc8YPpXdh5WlY0lFJmLLeRtZXFpcFseW2dvXp+te1hFL2yZUPj1PnfXro2uuT/Yjty5yWHPWvrWvZSufP5jWjRqNQRRW423AaNzuPVu9awrO1jghKWrTG3dyxmV5JCw3etZKKlUu2c1GtKNe8mddpfiGGO0t5L/AJii/wBXGo6V7EJUqMbn08KtJuMpO1zU8RajFqkCahFDhQMYccCtKtWTp3T0PaxMVPDpxeiMfTNbfUZ/Jis3lVRhpZBhV/CsaVdvRRPKwuNfNZRbS0uXNZuILuNNOhS3McDFhLDFtZ8+ppVIznfmOuXtK2sXoZOj3kbatPNJGMQpxkd64sG3PESnfRHNQrv6xOb+ygM5u9ZtoJl3OZN5XsB711TpQq14J+p537zGY+EZPrc6u8124ljBC7MDAjzzj1Ne1HEzUEorl9f+AfbPEumuRHP6prN3qUo0bSn+0SyDD9wv1ryMTip1/wBxSvJnzOY5iq0/YUPek+2yLHgZTBBqPhe/TFzGvm20g9uorfI04e1wmI+Kzt8zy8trV41Z4Wq/eWqPpT9tS+n+PP7Onw4+L8HjSO6vodEg09NISw8tNPMGYpQJB8rlyEfBORzjiv5s8PqNThvjnG5NGlyqVST53K7lzax06W27H2uZYStm3D0KtNWmndNvps/xPlGfUdc8O3baZqaZYLyyZI/H0r+hKlfE5XiJUqz5vQ+KVXFZZVcMRr6Ha+BZY9O8PZSVC8x3OwOe/SvUy/38MpRe+p9FluJh9WVne5r609veRtBNCHhEQOxl65/lXs0aiqS5JbHdOrBx5ZK9zkb3whfwyfb/AAzdkMD8kRPQ56g9ulc+Iy+lL36D7q3TzPJr5biaf73Dy1Wtj7G8I/tCaJ+1N+zRJ+zh8SvCGl3EEMcT2fie5MtxrmnamAVUK5Y4t2IA2AYIftgV/MeYZPT4X4jqYvCrkm5XasknF9+56zjDPqXNKq4ytZxvon39T411t73wlpupaFqtgovra/MLhk5jkRip/Ov2TD5gllbqRXx2a8j5GvXnhMDUUo+/e33E3g1prOwADhZpGMjnH519BlFHlwt38T1Z2ZMp08NdvV6s1LuVLi1F55p3M5Cnuw559zk166SlC9z2VUc43E8N+KR4U1iLUzEvmiF4rZ3UZiaRNhkHuqsxB7HFfMcU01VyqOHvZOS07pHDiq3s3GPVkN9aRR61c2Tw+QokzEhGCo6rXRhlFvl7bGjklWafQr30NjJam2voANr4dHTgc5yD2/8Ar111XTlS9/8AIyrVack+dX+Ryt/oA1bVZU0JFh2r+7QN8rn0rwalJ1pyeH0t+J5E8HHEzbwytb8Ta+HFpceJNesvDrwMsiXAE8ZU/Io5Yn2wDTeJdLLqlWorOC19TDB46VWXsp6OJv8AjacS61cXVuQFZz+7A6DP6Vw8OwnSwSs9ZbndKUnC7Md/GlpFZDw5rZM1nI/RRlkY9xXdjMIqdqylZdfMqOLjTXJVe5mahoGoaNIJrV/tFlLysi+lFGulC6d0Yzpzg7xd0z7A+Bvj6y1jwBo3jbUvE6WN3Yz2Whs0tpJ9nbYhwrygbQdirx359K/IeM8phKnikrt3vZarr1/LuebjvYUm9Xd9EcH+0RL8JtT8B/EKTSo7u+uH1KxvrCbTNQ/0G3vFd4Zy8XRmZdw3DpiubhGhnKxGEcnanFSjK695pq8denfzPOp4etiHzy0a301fY+ePDk8jHEblRjBz0r9lhOMFyo+qy9UqdNXNKWAQyqyyAseoFRKpK77HZKdOnK6NK68F3d7p/wDa+k3XlzKPneOTkj6V506nPUOLFVqs7qOiPYvh5r0Nh4TtdP8AtILLGBISR1r5/Epe0dkedD2kyXUdGj10TXKNwgzzXOqMou7HKnd6oxNLEOg6gJoZgrKcsNwFCvJ2MpT6RLniDxfaufMkuoxxnG4c1u6fJG5DqxhE9F+Cf2XUdNh1AK8g8wkCOIkk5r4rM5J15N9T6TKavNTi77M9g+F0IPhy9lZMM94+ARyOe9fjPFdVrMLI/WuHo3wzky5Lbbp2DDBzycV4SqtI+iikQXFqXUk4JGAeOtaKq27lJJuxW/s2E/MY1bjuKPbyTNFGJC+hW5bPkLz7VbxMu5MqUZdB0egWvAa2AJFS8TN9TF4aF9jS0zQIRKv7gYBGB6GuariHy6s6KVGMXsdXpOk7sBuw649q8irXUXoejpyna+HdEnaxXy7aMoc5LMf5V+tcFu+Rp92z814hlzZlL5Fz/hFb+aTe1xbRxj5n80tvPsCD9K+uvC58/wAk76Esuh6a8uWhQxrgMQxPP064pN8uxUKd9y3oejeChqCt4i0q8ltf4hpU0ayk+3m5FcmJqYrlvRtc1jT10KY0SCK8uZZdKjW0EubNWlDSGPPBkxgA+uKujOq0nU3KqJLYgvtHtfOQXGmQ8rkCIcdO9dLqcxi276jR4c0+cP5ekknHJXOMf41l7RoyaTdyF/CSRjEULwq3PLYDelVztlOEbalabwzPHiZ4Z1VD8xaTgnPX8qG+4WtEgXw7fRlnkgdNzfJumJIAoukiEhk2iXcSmUXCjP35POPfsatTuPUoTaLMg/1+Bj5185ifXNPmCXvRsV5RHIotGuUJyGUyTSDGB04OKE7PQcX7tmZ1xZAIZLhArEEbWkc5H51uncyejKkqSrF5VtY23JH7x4txzz361EldmU1cz3tdelTa8WcsD5kAIB9Rknj8qcHZWLV2tChdreNvke0WTAICyg5A9OoJ65reKizGSkyhNLrDRrLBbzDYh8qIXDLgenJI7elKUV0MpprYy77xZd3l4umXlvCs6q7GGfT8kqDwS7qqn2wfwqbxg9DnlH3bPU/SzVvE+n63OtmbkQjcu8IxwFHXJBPWv1CrKKm2mfHSvGbbPVvhFr8sXw+uLuOJUjknYREAnKjgYz7VyTjeLbMlUcqnkdXouo3z2CxSeXAijeVJ2hj7+prLVRO614lfVrm1uv8AkItLJ5nDxdFc+lcztzalUZOKvY6TwfqWlzunhrD29nI4Vo7SQhQ3oAFYsxHAAHUjmhxjOVugqsptcyWpe8VeI7TwU7aDplyEgMw2W8Uu7JPXe2BlhnB7cVNWpCl7qFSTa5jU1rxxc6FFBqOn3TIIVUyS7vvnr5YA5Oe/1qpTtqmZSlztqS90h+M/x+8N/Dv4f/8ACW210z3moosenRwxl5AzDnaq5JKjdn0IFOtiIQp8y1bOahQnUrcnY8s+BXhPx/4w1Y/H34nW9xp1ogceF9Hum/fzs2QbuUfw8ZCg88kms6NOok5z27HsaUabgma83h+b4reNX0ae/a10HTMza3dA8zMeViz6k043xVSz2OedRRVt7nKfH7V9BvtT8P6FpsNvY6XBr1nGn2mby42VZlJ3E8DOMZPHNKVSFKSj5hQpctNtkX7ZOnSfEjQbu8l8PSaY9tcNbwXEk243EeDwflGVA5Dc8HA4xW0+WeslsPD80dU7o8X+BvxJ8aQfCSaxh0m1lfwpO1pcWk85XdbqfMgZDzglTtye9c060JKyRdVRjPfVmVq/x40H4tapei10bVNOl09GluItTRY42nAQrGkgbD4OOnp+Fc0HzT1NadKUPeZ0uv8A7VXhjW9ZfTfF2mX+n6lPZR20tjqKj7PKMNGzRSdGY5BxnOK1qxhOV5N2tt0FytX0PIv2pxr3xOsrWb4fSTac2gzQX1ndsSS19EQVP+7uA47jNKPRroZ0qSqbo67wL8R7T42eA5vEt0r22qx3CJq9mzAGyvQoEi467Tjep7g96mFR12dEZwirM888eXGnzWrWbORqCHCHjDj+IZ/EfnUunaWrE5TlHYwvg00Utp4k0A2MCS2199psyxKO0ZVVmjyPRgGHuKykoqVkKnBv4iDxn4hSztTONTkMsbARSD72zP3Tj04rKcfeNrciPALpX+KnjTUda1tGb+yX+zws6bccZLnPUnpmlOhWpVOWomn2YQ5Kr0HBNHkD6fcuiSFgI2Y8Yzgg+nNXyWXunRG0FynM6r8UNN0ue78O2tnDqc6xkARTYVTjpuHfrVezsrsmo7ppHmPhn4m3c4n0O+09omtrwzx2gffvi/iQdOcHI+mKThUlK6ehx0KkuZq2h2+i3FjqNqJ7dxcK4LodnVfT2qlJuWh6Ckkcv8VvDt/ax2njjS023mly5+U43wn7wPtXRFOouQ4sTRlUaqLob+kaxF4m0uK+tpwzSxh0cEDHtXL1NYy5onP+InIn8yeMbo8jIHJHr7Vo2rG1OTUbHL65a/Zl+120izQuOcH7hrpw9Rc1jTnVzKYtKGWRwr4/duRweK9qjJ8ysZyqOMro8H8dQyHXbn7UgRvOOSgxmvp5qpPlbPncXWnVqONjAicQS5xwT3qIVOWWpMX7OOgT38SSgDGQelOVdp3PKqKSq3ZpaFqss14qyxpsToXGQvvit8PinXnboerhsdDm5Fsu52OkaoviGyksltFEEPAlK43GvapuM1ZrQ+my7GSxiacfdXUY/wBmija0hiCIFIO3Hze1dtP2UVpojsxcqUaaULGbdymxiYui+YT8qDqTXkZhiuSLV9Tx8RjPYUXFLU15Pg98WfD3w0h+LOvfDPXLXw7fXv2eDXbnTJI7SaU8iNJGADH6VxYGpRo4dtSTb31OHB4ilCjKHNee7V9TH0qw1HTtauZNZsJ7S7jVdtvdwGN1BGQdrYIyOa1weMWIrSqqSdtEPLKr+szrTeq0RHr9/NFHi3k/eyHaF9SavGYyThyRerKzbMq0o8lN6vQ09L02Dw/pi2aBWuJ13TysPmB9Aa9zLqdLBYVqXxS3Z6mX4Snl+Ba3nLVsj0T7ReeMINQtoCYbZGW7mA42kfrXDThVxOcQq0l7sU+Znk4ecq2dQrQXuR+Jn058C9b1n4gfsV+IfhbF4hvbzTtM1i4kn0KzsY2W1aVMw3s0zLuVFcbNoIGZe/b+b+NqGHyfxKhjo04xnUUXGpKTvKztKEY3s21re3T7/u8hdDHZfKkn7yU0te7utO68vn0PmvR1imgFxexrNJMpDs6A4r+lMJQVaKqTV3Neq2ufJYXlq0256t6Mm0HSbzw7cu9hOs1rIhLwN1QeorSngq2Blam/d7Dy/K8RgqzlGV4PoaN9rvnzCWNsK8JVV9cV6FGrFT0OueKjTqpIjfVFsY1i3H96nDDsSDXWlyUmk3r19TprYqrGKt1P0S/4IkaT8N3/AGevjP8AEzxT4T8C+Ir3SZLKCTSPEUXl3hgl2/v7afPyumxiFxyeMgE1/H30hc+x2UcV0MHRg5RxVCUFOzlySTTUlbaV0le+zas02jxadKX9p8zfxJPeyutz4s/a3tPBGs/tH+MX8JBv7Mn1QyQGQlmBIGc5759OPev27wswmMxnAWFWYK9Tl1fe2x7FXC0a0LT3PKri7OlSuYVzGdwD7cbTX38aEsO79DjlGphb3WlhDrcUdqoeUeUIsls8f55q3iKdGHNUegU6vLTvN+7a5Y06KS+8ISa9J8smpXf2e1Xji3iwzn/gTlOf9k181Cs82xsnvCOiMKUvrGGdbu7L0Qy/1F5reG/kuTLdDKlpG5YDp+QGK9ilRjFJrdFSi/ZqSepk3uo6tr12LaytGywAkY5xXDmGJnWfs6a1OGvVq16ns6a9S/8A2DLocy2upRGI7cghuvHXNZYaMqLSkd1JvCJJnefAbwddaxdeOvizazpHbeEPDMct1IvQyXFxHbovPBJ3t+Rr5Di/MIvEUMGnrXnbTtFOT/I8yNOOMzWUoK+mpw3inXWvJJpo2wSSdxPOOn8q+lwdWGEpJJ7I9LFzoUE7vYzfhxYHxT4pNja2r3Eqo0kaRwmRsKCWOACcAc+gxmvGzjN5zwM6UOrR8zRxMMRiW5/I7O68NeLNPuC+leE9TvLCY7R5NjI4B9sCvKwGZOlh/wB49D2I1ZxcUk2j6r/Zo0zQ2/Zd0D4ZePPAmsLayfEDUNZvBbeH57hpilqsUMU0YTcqZDEY65NfHZnmGLxOMr0sPdxko7NLZ9G/Jnz+a5TmNbNoVsNTlLl6LRanD/tK/siftOeLLbwtofwy+FsutWMPg+Cylu9NthaIiLcSSpFKJdhaRA+0kg4AUAkAVvw5mlDBOvOvGUHKbdpO/RK6s3ZO3l3tdnsfU81fM40JXlvdnG+F/wDgmd+2lfbI5vhrYafuGSb/AF63XA9wrE19GuLMHCV1d/I76OBzenD+F+J3Ojf8Ek/2ib0LJ4j8deEtLTvtu5bhlOf9lAP1rLFcZUVC1ODZ0wyvM6jvKy+Z6L4Q/wCCVTaZCE8VfHkvlfmTS9Ixn15djXlvi2tJaU7HWsnxMvinb5HXaP8A8ExPgbYvm/8AHHi29zyViukhGfoFryq2fY2c+ZJI7aOQYRK8pNs7HQf2GP2dtD4i0DVbtX4IvdZkYH6gEVnUzvH1I6yOtZRl8X8N/VnQWf7JP7PFpJiL4QaW744eZWkx+JNefPH41u/OzaGW4CCt7JG/pvwD+F+hZudH+EeioEXDSrpcZA9yWFJ43GVo2c2aLBYSEdKa+427HQNEghUQaVbWsP8AD5EESj9BXG4ye7ZVOhQi9IpfI8Y/shNF1DWLERhR/acu3nrnmvyvim/9rNeR9vksfZ4axSdUKiQknkYPr9a8LVOx7lO1yFihztHOfyrVJ2NGhjRdSij39KXMhwsIts5IIUg+uetJyRrGSRYtrAO3XjHbtWU6iSNbNrQ1dMsyjD93yOBxXBWqXRUeVHQWaRW8e58Y9c150pObCVRROz8GSDWtETUIsRqsrog8/htpxX7hwjReHyGlGW+v5n5pmtV1swnI1HtIMEyx/KvcS9T/AIV9E3fY86/cqS6ho8c0VvcXEcc0pYwRySkNLgZOB3xTm2+hKlFEN5qFiUN3LYKU3cylhuX2qNXqNy6lNr1TAJIo3JXpiQtjnvxzQ07EpyZZe8cWlxaxzTxuJ41j8yIbZ1wS0mQcgA4ABwSc+lRFzTsnoyuVct2Vr6/lu7i2u725Dy2sskloYpJIxGzrtYlYyA/HQOCBngVoqEl719yHayRBNq8nyq8spHVj8oHr2NbKCMG7MrzapluUlZSMAvKQAf8ACm4qxfM3EqzzqQX3oN2eTJkjnuB1qLaEIhkuGRQn9oAN1Lxwk55/IVSso6kNtMq6hI4C/ap7lQ5VdyL94t9B0qJzildEyneNiFtCjuyrQu4ypyGcgY+vHb0oV0ydWipd+H7ZQXismcoSPMLOcfn2rdNpDUJPVFGewZkxa2isMn5AS3X6dPxqebqS9dCre6WLVC0+lNGqrl3LlQPrkgfjRHV3TGpcu43QpfDPiC5uvI1u1iFjArzyXd1tXB6LHhSZmP8AdjDEd8VM68qcuU2Si1cnvdO0g3QtVt7ySMx71kgsHKOCNwwxC889OCO4zWyqSa2JqRXKPTTreaDyxoWsybjyIhCvbp+8bIrnfPUla5ySclTeh9maHZSa3qVvJZ2qxWbuA8UTBjIeOSew4FfrMqb5rM/PKkp1Lvuer2etWPh3RRY2ibnjcCOLfje2OgHoK56snayNqNK7Oj0bU7u100XWohJJyMsrfdBPYD2rGU2o2Ou0djOn1a+1TUhDGfkiGZWx90egrjbfNoOMIwW50ei3F/YgandxCPaMxMgA8sD0H94+tVDmbu0bXi1oZEWs6D4r8Z2ugarfMhF7ETGl4sTmLDF2XIJlYEINi4Pz+1Y8sKtW0uhhUlOC02PQPGdiNP0Y6zrbiKIsY0EeMW45+XGTtJAP159K0qxtvsJOLfLE8dt/EOieI/GunfDnwLp6pJe3Bl1O/I3ypbKct8x+7uxjiuejSjOdoouUpR949J+JnxLOj6azW0bJDDbKkUW7BCgfKoHY131qkaUeUzoweIiqquk11/yML4b3eryeEIl1SCaP7Vei4ugsTNmR87AzYPAUEn0AJrJT5o2iiq1qHmfNX7ZfjJfHviDRPhH4W8Qiy1nWNdWwvdOkLCa2VH3STR4GCoRWyeNrLjncueaVOOIpyTkk10e716afPW2z62TKTqSd7aM9Y+Knjawl0bTtC0+RZdPtbOGC1t5nIMjAfMzd+eM10QTUeW5NNtXPG/FvjPw1+zr4N1/x14tu47e21KNo9Rl2k7EVgqMVHoc8dcVqqcVsNt813ujI8DWun+LdAvNV8L6pbarYNeG4eWJiyPC+0Eg44bbvOOoOM4zXFKE0/d1OyNVTSdjZ8d+D/CXivwZfeFNUPmXNrAJrK6Y/Og69c/wtjmtadmrMbc07o3/hTDo/jr4AWGpPBCdUimay1BlcMrSRKwbj/aA3D6GrbpqNhydnseEeMp5/g18SZviJaRlrfVI1h1a2jJRZFVsLLjn5lGRn0NcyrU6Sate+39ehnKmk73OA+P8A471HxBqdp4U+D1tHLr+ou81m7fNBZWvBa4kI/hGcKDyzfTNRKsqjBYiKkoJHmOip8Tvg0iQ6d4zn1aRJnuGuNVG8zyOf3gyOg+UcdAMelZRgnWuzodJqGjKusfG3xz8RpGsrPwy1nfPf+XdzS3GYkYjJYAcnrkCtq75k5dRr95CxyvifUIfg1p8eoWt7JIivIJbUnLag247s+ueeawo3nuKNJUYu+xzup+L/ABJ8Q7E32iWT6ZZXEoMhkbMhP932HStXJ0Z3SujOM51tUQWfhfTPD+hTagbiO3MT7pGY4J9WJrnlN3vc29q3HVHBaek/ir4g3uq6baNDZPEn2SQj/WMv8Y/Q1sm2kcc1ed0tzsNO1rxB4RuEGp2H2i0Ay01lw4PdmXoffFJQcVoaL2ravsdDpHjrwj4qsZba31COWO4Upg5wp6FSDyD7VPtGnytG0MRGqnFHGeHWu/B/iW48FXbjy9xlsTu4dDztFWuWSuiYwdPdmtrmoWl/E8T71P8AEGXlTj+XvQ11R07RZwd5MYriSHzNrfxRj7rL6iuilH3rmNNyk9SlLcbI2Ct8uDhhXuUF70SnBylY8R8WSSXWt3TPeeaqykBmHIr6WviFCPLE8etKNOtKzvYp6V4a1vxHMbXQ9Eubx1XJFvAWwPXgV5FfGU8N/FdjhUpVJaK5Ss9AutQup4obKdhaqXugkZJjAPOfStaNSOJaXQ5ZR+tVOSKem5JZ6lBFKI7KyAiJwzvyTXr061LBNKKudFGpSoVPcjdeZ3Gn3F3e2BTSNJnmZIDJJBZwlyqDq5x0HvXbUx9GlRU6suVPbzPqJZnQw+FUrKK7H058Kv8Agnb4S1P9mIftKfHv4wXWlT6sw/4RjwToFmGurlMZ86eaT5Yk6DABJr82zrxHw9GU6GFa54y5bP8AF6f5n57jeKZ18S6VN7M9x/ZQ/ZK8DfDLwLa67o3gjTLrxnqAYrqeu2wvDaxN0Kq42q+OhAzmvyHOeMc9zbGOEajUNrLS54eNz/FYiuoRlyxXbr8zt/Cn7PHjPxR4lttS8fzya/c2E73FgviEk2GmIhBUiJvkXAHYZya4v7UzOvQdCnJwVtXe3r1PMeYOg7Qdm92t2fL3xe/Yd/bA/a0/aT8UfGHUvEukDS73UhGninX79ILdokAWNVCknAUYAx2r9MyzivJuG8ppUlNuSjstW2ffLF4ChRpzjXTbitLNu55V+1d+x/4R/ZTn0HU5f2pvBfjjUr+RlvdE8OibztPYD7zl1Clc8dR9K9/hPi6XEePcp4acIx6yVk/Q6MtxtPEY2FWrFqKfVWPIr69k1S/TR9PGWmY5frsXuc1+sRjVxNT2cXv+R9TXq1MZV+r0ftdfI2NQ1Gw0nTotI0uELDD98EfM7HqSe9e/GVDC0uSG3U7pqhhqHsKS0W/n5nu3/BOy++IfiHW/iR8L/Ayxmy17wct5ryTX7QKlpaTxyySAKp8xgDkKcDvkYr+ePHCjktCtl2ZYhe9Co4wtG/vTVknqrJ919zFkFb6tmkVCCnzNbu1k7ptaO7120v3R4OYIdP1jUdJSQGK01KeJGXuA7AGv23h3FxnkdGpPdxX3kUaKo1asX0k/zL2kXA+1uJmyu0Dt8wrujWlXm4neq2iRmeJbf7Nr8ZtISFlJMYHasK0Xh6sXfc8/FYVU8TGQ++bfHFp+cux5IXlV7ms8Ti5yapQb1Lr14tKl3Pbv2UPBOgfFnS/iZ8ErG7ube7vvAUuq+HLyKYxM17YSJNtYDlg0ZkGP9kHtX5P4y4yOTSyjNIJSpQq+yndK/LUur+qbXXbQxzKaw0ISo3cdm35o8u8dJo1j4purbRr53too4UaWUgNJIIl8wkZOMvuPWv1PhyrTp5VT5dI2v231PUiqare5K8bLfTp8zlrieK+V7RD5oPPloCxP5V6+KxlKFF3krHHi69NpweppaD8EPiL4ohii07wFrd7bbWEUVtpkrlzn+LC8DNfA4/G4Wo7VKyUeiujylgatVe+3yrodpH+zB+1L4i0/T9M0L9mjxgRY2IiUDQpY1J3ElssAD161pl2aZNgaFvbK78xxlV5I04U5aeRteHf+CcH7cHia+gnf4FT2ESMSTqmq2tvkdOQ0mf0rmxvGuU0q0Wqidu3UqrhM0qVYNU2kvM9J8P8A/BI/9qZ42m17WPCekRAAuDqjTFQemfLQ/wA68LEcf4VtypU219x6FLAY2V9ErnTQ/wDBIHxTqqJD4m/aJ02EHomnaLLM4HsWK+vpXm1+Oa04/u6f4hDI8bWnapOyPS/Bv/BOv4d+DPgvrPwJT4l65PpfiPU7a+8RX1tYQwXV61vu8mLzXDhI0Ls21Rkk5J4FfKVs0li84p5jWhedNNRV3Zc279Wejh+HsPhouKk7vd7P0JvDP/BL/wDZR8PHzG8D32qMh2mXW9XlmByOpRSq/pWmL4hzWvL4+VeRS4dyty5ppy9Wz074Z/s4+APg1eWurfC7wfo2g39rC6WuqaTpMCXcaSKVceeF8wqykggt0JFcbx+Lqw5ak20XTybL6ErwpJHXWui6hDaCCGZQoO5kWBVI46kbeKiWJk42uehChGP2V9xI+i6vNIjnUGfPeGbGT7jt+VZKTg7p/idLv2JbLwcZS0r2tzhny6GNuvqOamrO6uyYvni32LVv4XjZt5F3tXIUC3JxjtyORSUkZWbdmXrPw/4lUNst1lt1kUkTWYx9CaTqXg2ldIHQqWuZfje78ceHrnRYfCvwJXxJbX4caxe22vJZS2JLAKVidSHAGT94UoVKPs5OcrPp5mFaGJjNOnG8eup1kfwzsp4BMLu6ty5AIlG/GccZXgkc8e3Wub2knudsY+5fYS2+GV5FcSKskE8IwbZoUdJNvferZAIPofyqvapA4Nxuh0nghLKY2U29JCpIDSAA/wD1qHUhawcs0LF4SSXKDT4JgTgiYbh09+DWTqOOxaV9xknhB0fy4bWKEZwVWIYP0o9pKT0YJanhfjbwL8Vr7xF4j1rwt8LdU1jw9b3wjutT0m3817KXAyJFHRSDkGvls94d+v4j29Gf7xLWLOzBZ7DB1XQqLR/ecg3hi6uLMzRarcQDOTHcRqrL9c18VKnUpVHGpFXR9Xh68p01OL0Ma/8ADXiOAbotc4YcHaDS+sYdW5oGspVZL4jIudN8YKfl8QgAf7ArojXwKX8MzTxC+0JBpPi5+nirafeMUSrYL/n1+I1VxKekjR0/wz47MgMXjCPB6ZiFcdXE4C2tH8TT2uPtpNfcdn4X+FPxi1mYfY7l7hUj3t5dmS23+9j0968fE5jk1OPvKz9TKpUxsFzTn+B6V8Dv2dfif8T/AB7YaR4U8UaPNPGRcNHfXdtCi7DuKt5rYPTkd658PVjiq/ssPS9/dczstPN2R52OzGVCg515Plemib/I6Lx5d6kfHWtf8JBe2jXTai7Xj2EUUMBfofLWH5AuR2HPWv2bhzMJ5hlcatS3Ns0rW/A+dnThTaUL2tpe9/xOPufil8MrPxsnw4l8YWkniQ2ZuotHjhlLeUASWZwNq8epzXuN1lH2ij7t7XOGWJw0cQqLl776BfeNYhCYIZAiyZwscucc9+Mgf41u2+W66nSuVoyI9dupm8mxsotpBZpPKJXoSOaTaSuc9ZJPcrS+KYZmY3N3NCyKUxGdqkj8OnvWidlcyjPXQiXxDbSBTHMzHG5jlmDfyzQ3HdGt2lqxZdSkcsTGwbnCsflxjnBNK6J3IReRM4CkA4JHTco6468iqTSdzNq7LNqJrtiLaCZ2zhfLXI/Wpck9h8yjoXk0PXLj5YdHlUg5JckDPbgdO1LmsL4h8/hb4jT2kkmmafYQyyQFLe9N0UaFu0gwCCRwcEEHuKUoue5zTjKZp2yfEKG009dX1Dw/c31jZ/ZjqM9rvadcEB3jPyBsHtxWX1Z05Pl2HCCtZlOLw7rUlzKx8R28judzJDaqFU49AOB7VsoNrc25YtWNXwH4OS98ZWS+JdM1HUdHtbyO58QQWNi0kpsY3VrhlVME4j3dO9RVqclN36diU3ZqO/QwvFXhjQ7XxPc6jIt3PZXd1JLp9q1zPBbxQM58tRDuGMLgfPluOSaVKKdLVv57kVaMou8txsPh7RZLhrjTvCtpAxBDGKJcnHvXRCmrWQLV7EjW9xt8sREbcjoFBGOtUtHexteVtyo9rcHfMbVXPUq2euP1rVTsiJXa0HLYXDMbg6WmckAqw/pWfNzTuZTUnTZ638PPiTc6ZfxwQl4ZYxmZZ24Y46qeOfzr9RnWlKR+eTSVRqJ6h4T8Uy6jdx3uoeUWllHljf8AdGeTj8azqS7GvOkj1K01q2vLApYDakLcnacbvf1rmnGUlcmNSK0FsNUFvPiYlrh2yIgM592NRGMVudc0pQJfE/jK8gQzEqcqdrKflB6cVFWXUmEXeyOV+BYtL/4hXnxS1p43GjxNb6TJJJuO98eYwHIBAGB35NcuHvKq9BVFra5oeNfjn47+IHjnTvBXgnRTqz6fObmS1t18uGMgZWS5k+6AGC5GMsN3UkmuipO8rR3Qo04RbjDS53/hbR/Anw2tdW8Vm1gOsXlhHEkix9HLB5MFe2WkA/2QorppctKm21qN05KyTM/UtX0Txv4q0q38L6ZaSzW1wk+pxSRyFIbXy/3hmZwBu3Z2lckdc8DGE1TrK63TWnl1/rzM3KoouLZhfH34w3FrPqeleDLmK30+bT1gSOVNpVQoVZODgPlTgjoCa55zfM1HsYxhJr3mfBfgnVviK37Xuv8AxI8b+Ik1C607w40fh9bxyG82Q/vWZjyW2qo9amhTTjK79466PNFNI9S8FeO9RvPN17x1qETSyXXk2FpahsKAGYsWPTp1rane2pU1yy9Sz4Z8I2/7S3xKfSfEYRvDvhKM32qRP/qry8b5oYDnqFILkH0HrROtyzsjWMOWPM0c58Uf2cr3wTrlz8QvhL49utC1CVjmztZD5N3kEhXiOVZeBngHHQ1EZWu2y5Soxhd7ni/jf9tnxZ4EuZoPjNpK6VcmKOFtZ0+Jmt513fMGTqhbIHce9cvPUV2tzOlXjduW3Q9b/ZI+Omn2/guXN0kkeog/a4kJJinZvlYjqCR3x3qYOrUjfYdSqqj90rftJ62NQ0+4tTlpGAWLavV3O0AZ68/zqGp81kVzqMG5I86/Zm0e20PVvH+jXsqTa2JLKCF2AZkthESY1B6fPvOK0VGcXdnLRlGpUbtsJ448MiC7EtyP3PziRDD8zPweM9uv6UndSudzq+7Y8we98M6HqvibWWZfs1pLbTpE6fPIrhlC49yBk+mah1bzaM6dSSkedeJheeMLmXVNVaK4upTmGKMfLbqOiD0681tBJPc0dWpJuPQZpgfwfLNNeNDHaRxEzpcNtRSO9XOnJ+6inJUYO+hw+q+J4PixrFzY6OSmkW7lmBZv9Mbj5R/sDj61zunKD11ZxUK31mpZaJfidhoulWMOiTWnlpFNbL5tq2eBgfMp9sD9K6VFLRHbWaUPQr6TqWmeJImk0zUUkkc5Ko/Q+1JSS0Iw84ybSMK50238Pa619BDHHHcvtvIgmMt2es6nccacKUuZi/EW2a80eHU7fC3NgweCWMnkDqPyrWjT55WN6yVSCcehl3Hie51vRY9TtLhTIqD5SevqDVuioPVmUqmmpympaobllljHltuPykjKnuPoa6aSgOg5X2NPwz8Ovid8QbK71LwB8O9a1uHTlDX8+laZLPFbAnGZGUEIPc4rpnicJhmnWqKL6Xdr/wCZtUqKNl3Nzwb+x14ZW4k8RfEi5kvLmb5jplr8kcZ7bm6k185mHE+Jr1HToK0e5H9kU4TdSpu+h6Npvhm18K2i2PgrTbfTIVTaUtogpPsTjJ/GvJdWWI/iSbOmGEpRXuxseMfFL9jvWtd1G88SfDnxE9lc3rFruwkciOUnk4YdM+hr6PAcSfU4KnNbdUeViMj5G6lCVmzxnxJ8Gfib4EuF07xD4Fv4yZNqTW0RlWQ5wACvrX0mEzXB5hrGe254mIwuKwcL1IO3dan6L/8ABP8A/Zx/4V7+zN4jtvFPg0P4n8b2yLIZYB9osrIMNsKqRkM/JI9x6V+M+JHF0cwzyGDwUueFLa2nvd9H/mfJZ5jKuIrU6UJbaux6J8SvhL8SW8HWWt+JPCOoaBoNs8NpoVrrUItpLjawU7ImwzAdeBg8HPNfJU6FfDwlUxF+Z6/eeLRoJxlVs1vumvLr+fXdaGN8cPihf/DmKGy06T7JcWdvE9sHbDXcnGEQDqcmtckpvMMY4w05evcmlg4YibTkk7X6/wCR5B8YP2mfjXo2nX3h258OX1xPqtrtfZqUZgtd3/PZmI5/2RX2GByPDVcVL28tt7p3v/Xc9DBZZRnW97X+vmfKvjX4pftT+JbeXwfd+NbyPSbRdqxaXI4gb/ZGwAGvu8syjhmlW5+Rc3d7/ifS0ctw1KS5Eubv1OYi/Zx+M+sWn9rXHw48S3b3A3wXMWlTOZPfOOa+2gsooQ5J14xbWlj3lk1fFU3qzd+H37M/7TivLcwfs++MZpGG1Jv7CmA2/UgV7OX8Q5ThIy9rWjzdHc9PKHicBGXPTk5bXsdVZ/sRftheItXWwsf2fPECzTqWjjvEjhLKCMkb3HAJGfqPWli+K8np0XJVk1s2rvf+vwG8TWrYlUIxanJNqL0bSsm0uybSb6XXdHuP7FX7G37UPwU+Nl1rvxX+Gg0fRNR8L6lo9/Nc6lC5ja4gKxhkjdmPzhexxX5P4l4vBcTcOwpYGSlWpVYTS2fuy138j0MswmYYfHRnKm1brfzGW/8AwS01zXvFmpa34h+OMGkrqFzJNDp2neGJZ235+ZAzui5zk9cV3ZVxjHAZbToSb5orVWZtj8ozWtmNStTmuWTudz8MP+CSfwx8WPNZ6p8c/E9xqVmoN7oUOiQWV3ACc7tsjPlSOjLkVvivEPH0IKeFhzXPJxWX8Sxm1Rs7dz0jSP8AgjZ8At1vqOp6d451UqdoSfxHBCOvX93HnOBXj4vxA4sxUOZRgvvMMTkPHeNUWqtOC03u2SaJ/wAEWf2arDUL3VPEXjTxjOs0xa101bmONrWI9IzIUzJjn5sAmoocfZ9Cn7/Lzdz6PAcO1KUU8VU559baI7v4bf8ABN39lb4QeIYPFHg/wXq41S3ikjjvrrxDOzFHUo4wpUYZSQRjvXlZnxBjc9wzw2PUZwunZrqndP7z2/7JwlrON15m7ov7Dn7LmjXRu9M/Z38KeaT80lxp4uDu7kmTNVV4hzWVPkVRpLTTQ6FhMPHXkR2+ifCDwX4Z2w+HPhv4fsgCSDY6Jbpj8QgNck8yx9aNp1G/mw+r0G78prNo8iKy3LTQRbSNyoQqf98Akjp2riu76mjUehTn8DNdzmdbhroBTsaOZirD3DYI69CKvn6DUEtbDE+H8KMc2JxIOCGyo59TUyaeoOTZEPhzAHluxHIJppB5kglYlsH6jFJSS3JVO7A+BL0gFo5WjVcKwkPfPHQ8f57UnOTVjZR5SGb4fSSuo07VLxYwFwrW6uvXJLMq5I7Zq6c7L3gm1JJomtPCV8+3ZAkka4LhUOM5BGM9DxmnKSlqY620L2l+GL9jLLDov2nyAqh0hP3WIz2yvIzxmuapXdNWHCmnLUv2ngqyvrqS18QaPNaRTYFvdwWu9o1XqSM8jrWUq0mrm0Y8pLa+D59C1BriG80qa0kiQQwjRQGLdC5Z2O4HJ4A4rKHNJttm/PDlulqJH8PdNjuTdFAGaMgCO5cIx91BwO3UV0ym+SyZzevU6fQvAPwu1PTLuyufiD/YGuWUCyj+0rO4e2u42D4EUqK4MmVAIIHWuKMsVKpK80kuncTqyo1Yr2LlF9U1p8mUNQ8H+JND8IS+JtO8Max4gRIpvs9lpFsHubyRFyERHKYLZGC20c1nRqYipWjCUXFPr0HjalPD0W0m/wA/8i7o+hXsmh6fqWr+FtQ0Se8t1nbS9btvLntSwyYpApZQynI4JFejJyi3F6mFBqpRUlf0ZqL4bW4Bu4poArsC3lhct7Y54pOXM7mkryb0A+GIZ5MW1tE69XaKIncB7gcfjWdWXK9GOKi4kE2gRxvI0ejuV2ZUyNu/Dp/QCphU10G2tjPu7OK1g+03luDD56oHtbZn3M2dqgICcnHT61nicVSw8F7Vq7dl89hyjJwc1tFXZwXwZ+MngP8AaA8Dt4+8EPNFBDq89jPaXymOWOSNtpDIeRxzg+taVfaUKzpTVmrP5P0OTL8VSxsOeHRnzr8RoPilafEbxP4k+GHxd1Xw/bDUmg1KLR45WFzGRgqwBCgdOT6VnWqQda7WrW97HGqdac5zir9Ds/2WP2d2+Kmiaxba9eaVql4Y3MVx4g8bQ2PlYGQ+wckexzya+OzfDSljL0qijpdqy1+bPey6tUo4RcybV7aXZwXjHwf4d8J30mhtb2jyxM0cjxa3LKMqSODtwV44NeN9QxlW1SNWNn09096jiaUFy1Iv53Odgj8JTwGYxpsXhtt7I3I/CuadDHQdr3+SO6hi8BUTvbTzZJC3w1ijBvJpFDL/AM/DY/CocM0vaCX3ImpXyqGrkXNO134L2U6tefaJAq5CtfOoyPoKmeFz+pH3Ul8l/mQsbk0mk7/ez1C4+IH7MGkfDfwbf+HPiB4hbxVql3eTeJIbK7uBDp1puCxRFsYkZsbsDoCK5MVkeaQo+0jKE207wcErNPR3v11v2PPo411sbONeNqK+F3u330PPdL0wad4gujaeLL2W1urxpbSW50+48yRSfX5e3HFb1OevRgp0kpJWdmrDowVBySm3Fu6vudTqOsavo149xpHhu6miuI1EksrEAOB0AfkEjmvteClUp4apSfR7Hl5tU5akXFdDA1jXPEV9Itw2ixwyldjzKYw5XP3SwGcV9xGg7XaPFb53zNakcUetsQryQx/KfvuW5P05q3GSL5kt2Rnw9NPK0114mY72G5YkYg47cnFJQXVEz9nLUfD4c0SBxPLqkrMcg/vEj/xq9loK6juaNtH4atypVvMJGSrXRbp9KjkbM5VOZlqGfTo4w9tocTjcMuynIz25o5L6hC7JJdcu9p8rR7dCFwpRM9fcVtCiupbdi/aav4mYPtnULkCFVtypOBzu9PStHCCWhzTWu5oW1x4iaZ3klBZUYHzEJ5xgHGR0NYSRV2SpZ3cpDzxJkrlwgwpbHJAJ4+lODaKTdhw0eVMbbXcCD94jH5jmrbstCXqWItHVlZJYuucAyEY7YBFRzaFwauRXXhBdUUCa0SVScDczkr9cnAojKxray0GQeD4rGXNkiB3UhlWM5I6EHOcjB/Wrk1IyqRjP3WJa6BHo1mILTSmgiV/ljUEj36jI5rNTsiEkkSMjJCXksFOW5If5T7fpT5rj6AlmZ51t4bKczuPkEdu8gOBk8jI4qJT11HBNiS6Xq0482OILwWcNY8k/XIrWHLzIpxbgztvE2n6Nd3KQoiRGFQ0khYkAjnr3+lfq9aykz80rScZM0vBlzqOqLLe2BKqSUjmljKcf3voK53Z6nOpc0j2Xwn4nXT/DkOjWuoAxxJmRygLu3dif5VhKaasjppws7i6R4jS9uHuRIkIX7+Xwx+v+FZxXLqdjcZaGL428R3muQtDBJMjMhSNgR8i56qv0rkrylU901jKK0RnWt14pFtp3wy8Cxf2bHOSr3GPMlAPLHGPmc8n+6O5rWlHktGJnOKWr6np2nQeGvhR4TbwrZkqJfmuoLaXdJcv3eaXqxPp0HQV2OMIRutDFQU7xlszg/iP8ZrzTIbjUGmt7e2gty0ru2fIUD2HU+g5rlc30N6s4wVkdR4C1q68I/B+3utRili1LXoxfatLM+1grcxRnngBcceprVt04+ZjBvmbseA/H/wCL76QBPJcPLJIwjtLdSMyyscKMeueg9K5JuV7vcKjSMT4gfCXQ/DHgXTb/AMaTyDU5UNzqc0aneWcZ2ZHOBwMVc04pITnUgfMPxZ8aftEL4xs7T4S+JFSXVbmSeS3vrFJY47aMfMyqABGFLABRjrXPzODa7gpy5nKW7PUf2EvjF4h8HaV4g+G/xO8SS3ustqDajLeTxBPtcLAKflz1TGBjoD71VKmrNyM4Yiam4y1R6/q/xFk1m1u5DeJIjzbrPYQcR7QvT65496XxN6nZGN43Z4H+1D4X0vxdpV9p19YwThYgjNgfe3Kf0pOLV2Z1ouUeVl34y/CG88CaLbeJPAF3Lpeq2umRTSAjC/6tThx0ZSMnJ6bqlVabSb0Ip4eST5mcL8DvH/xZ/aS1+fxtrNrBZ6X4ZZ4rWBZCf7TvkADSk/3EPQdz+FNXjK8QhKeIlrokXfCeq6n8Kv2hLafXdXlkm1zT2ikd22hLqJ2ZRuHUkMw59K65tShe2p0qKoyvtctfH74022jaddapr1zkByQ0bHdK+cBVGTuY5xXDKFSozWc/ZQ52eAWPhD4lanq0vxG8TeIJdOW+iCR6OgBRIQcqJBjl8HPtVxoqEbW1MI4epOr7WT+RB4u8QWHhBTrVrY3DBVY/ZIVLs4UfMf8APrURpOdRKJ23hTXNYyfhV8M/F/7Rnw81f9ozxZZXcXgHw7rkdg2mxkqbi7cFxHI3O3Kq3BrjzrNnlmPpZbRX72or3eyR8/iK1XE1o0qafK2/6/rY0dN0PT72Q6lpFskEIO2O3QghVHTp9K2SqprmevU9qnTjQglFFP4ha5DoXh17eGdo5tVmW2jIXoCcMw+i5rojJcyUtjLEKbikupjXmkzaeYr7QpxG9sEWIrwSuO/r/wDXqfcvua0qU6TuaGs6k2ueH/O8ryr+3BLof4vf6VtBPcqvCPIuUoaNrw1OxFvKQ0cqlfn6j1Fbp8quhU6/u2RyLuPC+r3GizEfZ52MkDg8Z9KtpT6HHzzVTUytSV7w77KMtdE+WiAffY9BWlL2cVzT0SO9OcoaI+3f2SfjT8Q/hv8AD/TfAFtfLoOo2FmY5Z9EXy1mDHJFwAB5pOcZbNfmWe4ShjsTOs3d3012KjP2ibe6Wh1HirwhF45uLnxDomkQWd+qh57eE4S967nRSMK3fA4PbFeVgsTVpv2dV3WyZvhcbUnifZ1PhsrPz/qxw8uh2jqXEZBJwy7eVI9a92nUtoj2ZQSWg2HQYxJ5Lbpc5ztx/ShS5J+/dr+vI55Qluej/s6Wvw28F+Jl8f8Ajm9sXurRimmafdLvWFiObhlxglR90Hvz2r5jPcVmM0qGETs92fG8T4vGVaX1bDxbT3Z7x8Fv2i/2fvAfx/0K88I6ve+JbiC9a7ubC40j9zIxOSzyH5QFzwK+cweGxGW5jDGOOkej6nyNPLKuHiq9SNmvM+VP+Cmvxg+M3xo/4KNeGtS17xDJqdqdR86y09XK21vaZGFjUHA24B/GvssuxSzbIcdicUveu0vL0OWUo1cJVqVJO/RG14o/Z6+J/wC0r8fbHSPBnhuG8k0W3EdvNfybLayU/ekZsYLAZPtXk8P4qhluE5VpffueZg6k1gJRjH3v60uc1+2P+z5b+FtZs/hF8CvA2sa/pdm4k8X67aIbgS3pHKeZwBznC+nNfSZXxJl9LFVJVqqSn8MXq/8Ag+tj6XInhYYiPt5q7Wxu/s7/ALCn7T/xv8X6XdW37PE+gWUcRTSINcihsopYYgN02Cct1BLnuwz1Fe1RzDD4qq1R97ZX6LsvXR+p9o82yLCZhClOUfaSTcY6XajZNpbtK6Tfmr7o9P8AGHw41H4J2l9d/FD4kaBBHonyz21hrRlMT9TtC8YxxxxnjrXh4/OcDTrxpSlzTeyWrO+jxpkvO4O6t5Fvwbp/hn4h+FrLxV4a119U0/UAJLeWOZmVl+ueKhVlJuKVmujWtz6zD4jD4qkqlH4Wa6eAbZtQaGzk2DP+qlm4z67jXRGakrM6YyjGWm4H4Zanrem39hL4Wmu7m4OdP1ZtZe2WxIzhyiKfNGSDg+nWnQqxjJ6nR7GU5Kd0rfiXNA+AHj3UNRszZeLvC9osdtbw3Ok3llPPaTTCIJNcLL5vmxb3BfaGwpbgADFVCdOEm5Xlr1t92ltv67mGLourFRi7PujpLf4KeP7HU4bnV/EPhmS606fGl6jYXlxHPbRHIeMSAN5ikHbhsjFVUxDirwJoUpw5rt9jqNT+G2kHUHvdBuXMYAeGKW4LvESOR5gRNwB77R9K5PaN30Ol3SsX9M0vUdPiEU+pSzDgPDcZkXjGOo/lSctCYrU3oF0fUE8uRjbOOu9S8bfQ4yoqYycZXZq5K1x8Ph4Ah0izGRkmEAofqetaOto9TO/OW4dA0mQFXjZSv/PIZJ745/wqOdj5WtBs2g2EoKJAwZcjJyAfrV8yKVkiGXwwm4TCxYkdTjBJ/wAKUn2J5rif2HaCMM1gFcZbEi47dT6UX0JUW2SDQoZE/wCPJcnOCTnA7/hTT1NrcoSeE3kP+j6e+A37wRIcd/yptu5Ld0R/8I5bSxrNNp00TRn54570RlvfYBzjjv1olLQS1A+FbedCfsKLsOHEcqtk89cnrWd1Fk8liO8+HemaneW98+q3UEsAJi+yaxNbJIM/8tEjYK/0YHFTUipFK1yW68C3DurzLFdFQd7NN83PbknPr2/SjljGOhFSSeiC08JQWQwlhHb7ozsWXhl59en86hNPY1px5Y6kyeHopVDOEnkL7kIYNjg8nApz5bak3TZY/sa4t1W3ZZQACWBxgHPqOaUJPmuaxWhY0/T555UZYpWXeTu2FizDpwDRV13M5y6M05vCWvtajVpYriK2aPc0nzFY1Jx83HAJ6VlCpyuyI9rSvy31K154efRXFtqMd2kr4YCdSjAEbhyBnbj1HNKT5tLiVWL+F3RFe2iatZC2RL+2kgkWS3uLDVZIJAw5z8jDzF/2WBHqKHGcot3JUeeW5Ve48UXnnm4l85pZSSzR7SfQ1VOHKrI6rRVjMsofjZZ602p23xAkhhi1SC8stPs7cRLC0Ksq54O9sM2SeOelZVcBRq14Vajd4u9lp/TCpCklJJbq2p5xr37Pfxjt/FZ8WeFdXsLK3e5mvdTtbPTMvczFeHAQqA3GDnrnrXPhsG8LUk4Tdn3d2edOL57pW9D5d8XaT8SPB/gu/fxt8RZfDmoanrtw0/ho3Drc3CszeXOy7SgXG3jeeSa+hy/BYDEYtuqum7/Q4IVMZh8M4xnJJu7V9HbZ9tDn/hr8PvE2oW1ymm/GHV9OlkhZpJZL2ONGx23EHmvUrZFkeLqXqU07Cw2Pxqi405tW13sF34R+KMQZpfirq924jKlDqaHI9Puk9K4a3B/DkpaUEjR5rmdVW5m0IbWK10+SHUbTX5Lh23JcReJvLQgdQUER4/HNXHhHJrXUEvkCzjFQVtbnP3aeJrQLI9veSx8ITJr0uPocY5qP9UMqb3t8kZzzbHSXNYXT9X1y1vmiv/Dc01vg7d2tXRHGcDh8UpcHZVOTSm0uj5UVSz/EUVZxv82ewfs7+OND8cfEHS/Avxe8RDwfoENncG11ldTuyPNABjR3Zm2KSOwrzqHh1kHt5VK7bi99EjnzHinNakIxp6dDr/hJonxL/aY+Oj+APCS3d9a/PDp/iLX3lgsoo42kJla6n+TbtC4wSSTgDnFfM5nwlChiVhsq1u9L6WXzPfwXEtOOXvEY9uTSS7v5HI/Ez4vRaZ8P/FPwlN1N/wAJXp3j2GOK1gtTNbzQW8VzDNIlwg2MpZ0K4PzDkVrgsHmWR5o4zs4OOtn9roVPE0s0pQxEbrfRpo890rSPitrcgl+zTRg9CUx/+qvajj61So09uhg4NrRHRaV8L/iBdkNfXsq8Z+VuPzxxXQqzdNX3MvYSeqRtw/BXW5IkS5knct1VZcge/UVmq0myoUmknJGhB8C57cJO6KVzgs7579+4rpjiIJalyhGWxp2Pw4s1AKuM4IJGNrYHTNNYiD2Zg6LuXovBtsuMRNkckKpIPHTJHIp+1SKjCSLMfhi1tojiGRHDbWVkYDHr0PIGabru+hPLdlj+y7OR/JSeEScbIzIA5HXPvVe0JcGnqiYadcF9hjB4y24ckiq5ieRix2REZtwq8Zwduc1PNYVnsPNjCowqMzEHeDJggZ5H5UNtkyuiR9NhlAe1tpRjGFlcbhn8uKSbSCFyGTTFR1IsVxySVnbBNPme5tzLlLdo9vIlwtq0imzkiim8yFowXkUsoQsB5uAOSm4LkA4JFZe39/lZmqsXLl6iExAFZLaYy5z5jxnHP0x2q3ZrQTvzCSfZVQFbVMMc/NkgDPTrx/8AXpIfKQTskcZWKziAdcEqzE4PGTjpVdSoqyK+pwves91d3U0TJFtVLedwhIHOfm69+K3pKKkkaN3RcivbC71y3u7398IeIoi3yq2OWbnn9a/VJNTlc/L6ztN+p6BZT3eu6a9jYwhYrZN1w5UKo9uamVNyJhCzKfgvXzDq8y61rU1vaxg+XDCwUlvVua4ZLkluaRq8nQ6O01CyWwY6RPLcxl98kqoRxn35P1NJyurXNXLmVzL1rxrpEU4vZBEogJWNi2SPUkj/ACK56koRlqax0SIPht8c4Lc6h41sUDXc0RjtpGGVjtwcEpz1Y962pVVCPNuVJqasQr8RfEfil21HUbn7JAScAnBI9/U+1TzubbFNqKSRn+CPL+PnxDTw5aQv/wAIj4ZuFn8QXoU4vrhTlLYHvg8t7YFaUqac/IiyXvSOm+PPx+0SzF1brqAhWFfnuJsMoOMLHEgPzN0H0NKVSDm1fRBGLndo+ePgjMfjF8Yj8RPFKrH4c8JnzbaO4nH7+6JIG4f3gASAfWsFGXtubo1f+v8Ag+uxVNc7aZ2fx9+J0fiEybbgSpKSkcrN8qkkA4A6kHC81VV63ewqtotRR59+zJ4ct/FN5rfxE1nXoLWC51T+y9NmuoH2R2UI/ftkZwTIevOdvA9FDmkrp7HJHmleVhPE974N8OfEOPxrDo4uF0268uRQcefA+BKzcfKPm+nFTUldK2htRpSn0NH4n6DpWj6zd+IvhN4hgZYJ1gntJn+XeY1l2cn5TtdeR1zWMXGLdnc75v2dP3jwz4k/tEaM6QaZ4kUWskVysuprL0VEYZcH+IE+nNWqt21Y5YVYu7tsd1q3xM8e/tQ+EpNatLG60nQ7jTYra1guHK3N3DGTghScRqcn3IPPapjRalzSXy/rQ6Pb+2VkrFP9nTVbH4TeILn4V6xHHbLdSSzaJO0eFZjgvH/vZGR61dacYdBtxpxsc3+08t1eaH51nctFdW8sc1pOAVeKRWJySeQDkfnWVOcpta6HLNuR5b8JP+Ej+OPjCT4k+NpIhY6dfNbaHp2/908q/wCsnbtnOcUVPerckTXC+0rycqm3RHX+JNXmuryWEMoeQoqtsHbIwB6Vo5vl1O6bsrdTG8INpMWoX3iLVrZpfIkFrFbvGDuwPnPNKDVzCnJ3budJ4K+Jml+DNP8AEHwd0jVhY+BfH99ZnW4guUs7uFjsugf4SAxVsdQfbFeDnuVLEzhmCV61FPl812JkvbTj9mzH/G79l74sfs/eKYox4XvtU0LVlafRdV02Bpo72Ic7025yMY/OpynO8Jj6fvNRmt0+jOqap06vLe7eyPB/F3g34v8AjAxeIL34U+JYrKCULYldEn2EdS+7b9Pzr2ZY3LaMGpVY8ze10cKqqVT3tPI1YVEmh2l/dSAMpEMiMMEMMjv3zgVeHinq9T0KzvDmQ3VY1ZT5abZFU4KDJwfWuj3pOyRxzUpOyRwz3N5omrvBJGY4Z23IWB6/XtScqdN3bHCk4K7HeKYF1qwxvzPCdysBzn3pOtJsqTgle2p1v7Mek+FNe8aSa54ptvtUel6c8y2azBXM+QisOOxOefSvA4ixOJjg1TpP4nr6HBjcbLD0eaKv6HqOrW/xM+Husp4ztVa104tkX6SpIHUnDArnJIH8OM189LE4VQVOsn936nHh8TXjWVTWEX18j2fV/jV+zn4I07VIpPjHfeIVstDhu9LFho0tuX1FiN1u6OAQi8neODivEli69aMaNODUW21qrX+8vMM8yjL6tTlm6iUbppdTwPxp+0p4jkuZdTs9E0S0lvPmjM94HYk9CY1PGfTFevhXXqR5Wnp1UXb73octHjPHYiEYqEYp9b3fzRH8MfiP8UfHWqS2/iPxFDbRW0W57C1tDA5zgjJbn8q668VOF4bd7p/kexSzLF17wlPb5HaNPfSX8Gn2sI+0XbiOD58l2PGTnrXm4nEU6FFyeluphVxEaUHUmfTfwjtPCvwR0M6h4nsLeb7LD9o1GW4XHnYGSueuK+TlUqyqc9W7b2T63PjcTi62LrKUtl0PM/BNt8L/ANoTxB8Qv2s5HFvd6e5g8I6WX3Q+WCA3JGRzzn0rrzCdfC0lgY+5F+9K3meXj8TCo/ZU7RT/AAKfwR/a3/aj/aGnl+Bnwd+Edl4cis73yPEniO1uQY0QHk7l5lOOgPcirzrLsFluX05VsS3dXjBKzfqThaU8TJUqf4H1h8XPjn+zf/wS0+DsN9qNxB4o8WataebY6G53gTkZMku4csSep4Havncvy/F4/EQWGnCo5r3t7U/J3S970bR2VatLBv2claS28z4a8W/8FS/2jvjf4ofxB4o+KV7Dby2cv/Ek0oHybK3I5HHfHftX188hr4WnaLd/h5m7XuraI86pVxNSak5a2fyOU+DWk+Mf28PiVHbRG8i+G3h6UNrUxYr/AGhJ18rceWJ789678LkkeH8PzSSeIns/5V3/AMj67hfJfr9ROavCO/mfbOleG/DnhHSItC8HaOmk6fBGsdrYwYVQOgxjpThHkTe7e77n61Tpwo01CmrLsOtJ4Reva3whFwDtgt5CS59xjrRKpG6SOynVjTdnq2dd4H8LeJL5J/Emm+EtRubKFGF1c3Vufs0OByctwKmrOlGPxWbLnj6NOnaT2Oz8DwaTrNutppd+s8d2oe3lEIAbsQrAE4B9DWf1mndpy+EeHzHC4iPuvRnSN4LsVWMGCKcLIV80OG2kdQT61p7Xnaa1OqhUjVhzQd0Ph8MWby7IYSABkRsw5x+H8qr2ivZGko63HXHhSKbYxtWyG/do/IXPoaNHuQ32K1z4QnJ8pI5Qy5+Vz3p84a7EMGm6tpMm61EqEN0XOP8A69F4sm3U1INdmeIR3lvbsxbcZGwpIzyMj1qXFrYd5LZlxJ45JBFbW0sUjgBRuEinPcY5xVa2stzSKbiXktJYgyNYqGziTc5XPPPFa6X0MGmnuOXQLRQSti6hjtCh8k/Wm3boWpW6jv8AhHLMAP8AZiGxySSB71FhuorEj6GCuyANgcsrPjNaW0CMvIlTR1Y4VEOV4Jwdw980JtMPQBoNrGR5WmQgNguVjGSahq7uO7aJV0BZCfLs0znlcA/jVPVAm3oRvoEsYzJaq/Ygp3qWlYm6uH/CPsgybePBByWjxn2PFRCGpcpNFdvDUFxC2zTosdAsJG7HofT61NSJNN6jhpsOigR3V21vE0Jcn5pd4XJICqpOcduprPmlA25mloF78OfCfi/yNWa1iMkhD29xG81rInXBK5DKfwyKlS59TmnUbkrorx/Bq60/T57bTPGviW3triMx3EEevysrjOcBXJ4pyip6GfsoOV7fgSTeDPFk1wkJ8dapeMiKgS/VJsKowEJK5wB71P1ead0zVRjBWjEY/g/xcl1Ffbba4VGxO1taqrSJjkEHqe/BHf1qY0qyfc0puFzQuLfTIo4GZN0zRb5kNuVWBySNmT1OOcjjmuhR25i7yk3dWHRw2kkW1fnZhyRH+Oc4quaysDXcqyaPpctw1tbWTvO5Pzg4UgAk9Ezn8aiV0tUQ5Qa8/UxfEvw98N+LLZ7HxH4TstRjxt8u8tQ4x1PXJrnlKT2uVfnVmec6n+wr+zhcie7t/h9PpzysN0ml3M0IJ68bD+ldFHF4qikoSY3hcNUjdwRg3v7CPwuu49ln4r8WW8WCyxNe+av5So1d8s3xvLo9TFYDCbctvmY2p/8ABOjwXcoXtvHWrKmMEPpdlnnqP9QKqnnWNUfesZPLMG9k0ZN3/wAEzfC92qxzfE/V9kfKAaVafN/5B9zWX9rY7V3RMsqwSW7K8P8AwS6+FyOJJ/iP4p27ThLWWO3Ujv8A6uMUoZtj+Xc5amTYWcr6mpYf8Evv2dohi8m8S3uOsd7rtxtJ+isBUVc1zSpHldSy9Ap5JgYSvy3Oktv+CfXwOgt1tYfDqzwx4EcF/eTSKMdMB2YcZ9K4lLEc15Tuz26FHCUKfLGCS9DpNF/ZU8CaTCIdJ07ToVjAUxx7iqZ/2VwBVSpRcdTSeJjJWsreRqR/s/eF7CeOCS0gk3H97JGWP0xk4IrFU/e1OV1G37q0Ih8CDFOBJc6S0BPy+XaOjBeeuXxXROELaCinfUhvvgzpMgMdnaWxlGQzsWGBxj+KnGKsUtEZt58EfEttMmoaJbWEtqTiQSztnn0656DFc9X2kfhRmlFy1ZdufhakFvZ3N62mO08Je6htjLFJaODjafMQpJkcgofrinTlUsr2JgrzkpRfkyCP4d6NFAxmurmOckjy2gXGO/JwDgVor33NVGNth118OPDkEhitNbM74XdHcXaQ7SckjB6/nVKTUjju/aWsZsvhrSI5vK+zQ9yC8u9vw9O1buUrG71Ww0eE4pmINnGwx8gVsluvYjPvT55Iz5ebQlsfhlq2oz28ejeGGne9uVht1R4l3uxAAZnICn/eIqJYiFN2ZE4KFJzb0RjjQILhmj/suS3dZXjkiuCokUoSrZ2FgeR1BII71pGXNsYcqkrohk8PWQ3GSwcH+EAkDj69q0TsioxaIX061jAWWxwOuGY4B/HpRuJq+hHcAJGsckSYTICtyB645o5ddSuTQqA2kaF5LWLoSGGOR+Ap2I5dRHWwkUv5O0A8Iq4J9smhpIbS6ELW+jSXESJt8xiFj+YAgk9Onek3bUaTtZDL63tY5HWWyjiZFIkDgK2fTHTua1pTbqL1CXNtY860LxJDqusLpFmU88/vWiL8/U1+tygoM/LYqbfvanpWman/AGPaR6d9vBg2lpFDD5m9Tn+VTOorWNEnfQtT+Hn8T6RJdW8qW7bPljeQgynPAYY6VwVINq6GnfQlsdMuvB2hf2l4tEbTvnZaxOyxxJjAI+tc8vhs9zWXvQSijh/GC/25HJZWmnMyyxHcqZXapB3Z5yPr7VzVdUPlbRwvhPxPJ4Nkkt5obfykTZFasWKxKOF3YxvbGDgcc81TlypIS5k7nQeBX+If7SPiGTwf4JlNnp9gwXX9e24isUPJjUngyHHTtWuGjUqyeuhorP32eqeOviT4G/Z4+HyfDf4ZgqRAYwRId0zZJaQ88sxOSep4qq9aMfcW5zSh7XEOor9Fa+ml+n5vr8j5K+InjzXPGWsW+lzXLJcXk6wxJHMd0kjnAxzkHnnHQVxRUp3j3Oh1FSsj0L4j3l18JvBNp8KPhxZww3sMQku7iaIOssxUFnIByQMkc+ldUeem+RK36mNWpJO8Tx74kXfxR8ZWd/BY/Fw2aW9vHbx6XpmmpDI7lAztvOSRk5GMH8aXvyfLcxSdWScmeIfCPxd8TvC+lXvw/t/iFrcB0W7dhC052lX3MJMHqcn862hRcJOSdhUqdaN4p6Gp4v8AAXxl8a+G5NSv/izqklhLOUZUu1VnlwrsHxglcFDzwe3Q1hJ8tRnalKjRTbPafgn8VdT+Lfwj/wCER15YoNX8LzG0AtlJE8DKWVySSWIUYDMSflHPFKNG0bPcxWIVSajJ6vT+vuPOPi94Gs9P8RaDqV9bieBNXgS4SU8qGcDnPXqDQo+zlzG7g6Svc+xR4e0SPw9byWGmW9hF9i8uK3kwJ5kTO5wy8Fc8gejCrhWdXV7m8b6dzxv47f2XqNpMbGVklguVe0vB8rwspJB/2fX6VM4KSZnUhKSucV8QfihB41+D154i1ZY4dU02IwatGGziRVBDD0DAZrOdGdKSi2tQekLo5/8AZ+gW0+D2hag8LRQTWrTKXBC+ZIWO4nsMc5NVOMYTs0dOHVT2epznin4u+EtI1ySPS521a6hyFgtFLJ5n+2/QUTpztoxYmpaOjOV0rxN8QNfmXRdN0ZLFYZWea7uHypkbJY+/Yc0lanT13OehCrze9sbkfhyOxs5LW61GeeRgwnU4COCByBWHPKcrnW3Hpue5/AL/AIKC/Fv4EeApvhJrF4mtaOthPb+Hr+9gSSfRTNtDiNnBO07VGPavmsz4boYrFRrYWfI3ZyXRtHHHCU5YpVpfGk0n1SdrpPs7K/oj1z9iP/gqz4K/ZP8ABMng74kfC3xH4onvEnWN52tru0g3ncXjiEatETxxuPTBrxMx4azWderUw/spKcWveTum1a6d91v/AMA0rYTEayXvWOA/Zu8ffsJfGD9qTxRqP7TOmPovhbxNcNPYQzziJrRmPPoEbOTjPfrxUV6fEGT5XhoU3KfJpNxs218zmc8XCnGnO+r6FbUvg5+wh8Qf2/vD3wP+HHjbXk+F91dLb6x4gsbxGfzXGFVXJYKuc8+nSuvDcRZlhcnlisZzRd+q95R72RhKtWSum1bqHjj/AIJVQa/+1T4g/Zu+Ffxq0aCOwhuL+wuNf1WJnnsogzbkK8ElRxnv1xXHLj+lTwX1hU3Ujzct0mvQupj1TShOV2zK8KfsBfBW0An8R+MNb8QTxgCeCCRbSDcOo3DLEV3/AOsmPxUYukuW+p7dHLvaJSk3qd3pnwO+DHgOOWXwl4E0/RYjEfMu7iEkle4ad+tcVXOIyrclSb11S3/FKx2xwODp071Eku7POfjJr/wPv/CTeAb6407UL7UpyNLn09mK21yvzIdxAGciuXF4/GVbfV1pDWX+HZnz3EWeZSst+qU2pSbtddPmeHeAvC3xf/aU8Ua/4Y8JtAde0PTJJLhZWCtcQQrkjngt16dTXXWWU8P4ajWrp+yqP7m/0PhMmyzH5vjKlOjTvZXOh+AXj74J/AjxFYS6/wDBew8e+JY7ac6+viy6e3t7EspCGHZz5iN827nkDHqOjELGYqt7Wq7YXaMIN3mvOS1VxYSustqqcqSnLVWeyOx8BeN9a/aM1a9+IXxB+Lltf3VjYtG15fRxQvHDEMLESgAYgALzzxXzlVYfhWKoYbDOMZPRJt3b663PYwuOre15pa36En7L6aH8XfiTqHjvWfEFrbaDou620mWaUqksw+83GTx0rHP61TCqhg5+7Op70m7+6umye5xZtmcKuK+rw7HrvxwitPiJ8P7vTPDuszokVk8e/wC1fLcEd07/AJ1z+1cq1Ke/Jbftc8uVZy5VHRo82/Zp8fT/AAv+GY8FaxpaSed5sclvJbHv1z9cZzXVmWMUMxqVeXm5lZeXoebKlOpWlJq56B+wN4b8Ga38ZvEfxm0LTrbR/B/gS2kv9VltHIi1HU8ZjtyQcM2eT1rzOIauaUMspe0fNUfw83SP/BPbyXC81ZypxsoavzPh79r79oHxd+0p+0HrHizWrpo7eXUZFtoXkYJbxBuAAegr9I4XybD5LksWknOSu7dWz5/F4ipiq8q0u+hf/ZW+FPxH+OXxLutP8KyXGn+DtKtgvi/WrZhGRbk/NGjMOXboMc81rnOOy3LMJBYlKVabvTi+/d+SPUyrL3mNaMJ37vyR+kHwn8JeDPAPhCHwr8KfDsmhaFbx7rXTpphJLKOpklfAyT1NeK5SnWlUnJuUu7vby6H7blmFpYTDqlSVkjs4NSuLoxRXwQqw2pGLc5H4+lTJTT3TR6lNx6bml4d+Aem/HjV38Lpqs1k+nxm71HW7NgjWSLyFZu2fSvGz/MY5ZgVUpyTm+nXQ+dzjMHTnyQ3R2+ry+INT+CmpfC74b6/ql8qrNPqGoTTCC1giWLYi5481yQzbRkkkegrxctxsMTh6c5ytUu5Wb3Xz/p9NTzMPT9rhF7z9pJttNpLlSW347/I5f9mVfDfwi+DWn3njDxElmNB0+RYDqkxR57lsDLBjnbkk/hWWJq0JTqVp1rufb8jLDV8tw9CCm2kk7Wbd3brqVv2TPiJoU+rXXwn+GguNXtYb+4vtV8S6ldskd5eTyFvItgclyM9Bxg9a6/7ZnhFTjL3+ayUYrVLuysh4j+o4j6hSpymu+lte1306+ul3c9313U7bwfqT6N4nnh07UFiL/ZLuVfMx1yBnpivoYVoVG1s1vfofexzXC15+zvaS3XUXw9478Ja3JLp+m61DNOkPnMEdSygcnj0xRKquVPmOmhjMNWk4wabXmWfCvjXwF48tp7rwn4ls9Sit7o2t1JayqxSUdFOD15H51bkouzOnD1qGITdOSlbe3Q2ZtLQQZ+yEqR3GSD+NXBt7l6zRSm0e3OD/AGejkfxheffIockLljFamH4o0G3nsybDw/CNSjIW0v47hk2jnKyJyJAfbBHrWU4VLc0ZWGoSavfQpa/qGsXdnYafqV5Gk9i7i1ntlZd0br80LBmPyhsEHrnvWNClWo4nnlO6OeOH5Zt3uOsJ/ENpiK3vrhiCAykZ5/wr1faKS902jBTdkjRstc1/azNOmxQWlkYAKp7liegx/KnGV9AUY3sbXgnxFofjbQY/EPhzVrPULOSRkS8tJRJG5QkMAwODggjijnu2hOacbpm+bMMmEhHXDbc5U+lUmrXMnJj104j5RbsM/ex9e9LfYfM+gqaLDIpYQBSRx89DRfO0TPpc9vERFZCcg9DKAfzNErpaExabuxz2aKzbgRlflEuDgYHHFC0QTdxPsFpcAs1tHvwP3oXawP1qJKT2GtEK+gzKBgo8bcjY6huD1OMGh3QNqWhQuvD1ow81ki3NnO5NrenUc1KjzFLzG22iXtrys7/eG0KSf/105RjHVFpq5O0moWgUSW6sN/KBMfrR0HpYhlvZCJGa1hGc7x0PtnH8xRB2M+R3IXa2u3El18owfl83IP4Grlqim5IieCwRmdpHXggbTyPb2FZclhpyluVZLm/kgWysdTvfs8bGQwmQ7EOMZPpWnLUqJqKulq/LzIjTp81+pRuLhXZpQ7M54LM5BY47H/8AXXE2nsdNox0KjSvBmWIgYblTyM/Tv+NUr2uEW2itcNM6CRrdMs3JjYjOcZPFE5aD5rMiMd0ckXEmc/MW6H2qVa5XMpDHDqGIRpNow2JWXA46e1OVrCmlazIP7QglQSWieYpbgG5Zsjoe9OE9NDOMZN2GPqt1IpIhkACnByeOB703Zm3LYryXeoF8qWLEbQRwe3p0qeW7uJ3sMNxqD8i8kC8ZIIB9x71onoZODfUfDcy7QVu2x1JyeePzoTV9AUEtyVbuRyQZGyWwTyB/9endMCMzz7toDFjjAJIHvzRd9CJXHHUri2hZfKmAUD7uAWOemT0pStLUyepLMReKFuZGHykgFuB0I5H8qqysVzNlRNK02aX7RPczQk7trxTscggjgE4oajY0jPTVHL+IfhZqE+sPqsHxj1toTIsjWL2sJRSM8A7c9yOvesI0ZqbfMc0qd23Yvw+HZnjkIne5yuDNMMnJ+nSuq035lpvlsMg02ewS7eS3gaWS4VrS68x1EMIUAxmPo2Wyd2c4OKyeGrSrqaqadi+aKjYqT6XcrEqJewINx3+XCeSfUZwK2dNN6mMm7aGbNp1zGFEmpMRkDCLjv7DvWitFEXdypd6eVbJug5xht0h/XFNMq3MjPujYQKGuLhE/i37srjj16f8A16bmkYyjy7mZca54eiUn7arFjtXy2DF+ODxT5k1cuKctinJrekthl8+Rh1KR4z3Izjmo532M5KSZXuNZtZyETRZc7dw33RVj+A9OMUm5SJcZplVtSf7K1jJ4asbgyRkSJdgybx33A/55pOLfU0p3UipJPNaxyvYaFptqWYu3kWyjccdTx1rppRtNFybsz5+ufiKkVxHD4ciH2+6ZY4RFzLO5Iwi9etfrc047n5K3GjUsex+G/h78WdN0Ea3411uyF8QCumRREvbKRkB27t9Kx5E9WzN13J6noXwdC3F//a3iq+DRIoMVtCpAZwepJ6inzwStcpy5ranTfEHU4dfinvnjVYmVWOAAHI6KPyFctSlOo9EdtKyhfoeZ69ftFBPdxskUs0a+YxAUk+nuMcVj7Cb6Gl4vY8h8cW+p+M9TXwx4d1Q2UtwMNNBGu6NTjLD3/rWbwspPYJQlJbHq3h/xBpfwm+HEXw48O3EENtZhZrq0FzvmuJiSTPO2Ms7HJ/8A1VtOcqUNXr1+f+Zm4xjBQkeMfEbx5careTahrF6qxbiyBcAge5zx0rz9ZO9732GpwjHVnnvwW8SW/if4pT/EuSBZtK8MkpZMh+SW6fAz6YQc59TXpYbDyg+ZmkFzrmWptwfGKLUPiDqFzr80b77craSNNvIPIyevJ9D2repRlOXMc9R+8efeK9T1K21tPElhqUn2cNkbFI2tg4BH+P8ASuZ0505XsTHmpPmOXk0m7l8TQfETRptqTxNb6ujcAox4Y/Q/oTWFSTqND9pKXvHSeGtSk8RacZJrySCS3lka4WGL5ZHGQePfgZ9hVqHJG/UlVVN2Om/Z51238JfHlrS4jjEXiHSZIlhbjmP5lyMfewTzXNJudRJHXRUYassfHSa3udIupZmMZs7qOYFhkrh1JHv0HNaP2luU6Y8tW6Wp6brPxGu9Qi3tdlFS2TaS+BjaMj8a1hSm1ypGllBnBfEPxX9sae2YosNzCHYD+8FI/Pk/nUKLp3uRWrRirHlfwxh0n4kfFDxJpviEEeENC0qPUPGDxkjzFRtsdsG7PM7LGCOcEntXDjadeqouPUjA0/azk5bI0/F2t2WtaMljdqsNsuDb6XbsUgt06BNo+9gYHPpXZSjONPllc6K0lFW2OE1iCDRJ430mOJXaaNbWDywBuz97j0GTzWFeck9DmpxdSWhuNqEJEnmyhpGk3SNgZZjySaajOWr6nVVbS1IpdTgaUJcFWbBxtOMjtU+zcFexnBKTKV7MzROWlLZPK9+OlSpyhFpdToUdLWF8I+I0t5rpLiXPlyKYznJAx0x3rGakoNo0oVIxbitzqVvtFu4990sL7+gaIcf4VkoVVI7ORw1LWmW3hhIjCNMsJS+N37sDp0P1rOdKtLSSvfyM4Uot35Uz0H9mL9nTRPj78e9L8HaILTTJWVrrVtfuLt1FjYQgvM7PnIULnjoSa+e4kzajw1kNWtOnzX0jG28nt/w5xY2ng6VGUpQVz0r9pP8AaQ8Mz+IJvC//AAT++E0mr6LpQNpceO/FUhMdxIgwzQRNgEdcE9ewr85ytYilRVTP6/JKWqpw3Se12j5TMeLa+GpRjTX4XPnDxn4U+N+q2954z/aM+Kt2unpGsk1rczCOOMHlVSMcLnHGOSK+pw+dYKrbDZZQTb0va7+97eqt26nyOYZnmOOi+eo7W1OF+DvgyL9obxvf/EzVtW/sD4ceANhl1WQlUMzgiNBx8zsecele1m81w9lkMHTh7TF4jp5Lf5FZVl31hqKdox1bZueKPAWlNpV1rvwj8aalcTWu+VtW0+0Nv5as2NzlBuwSQMucciubAV8a6ns8VQTgkuZaySWi66LV9t2j6SthqWCw3Nhar5n1Wn3Hn2qaPJr2iyahd3MkmvaagW+ljywuIsdWPtmvTli/q2JUIpKlLZdmfPwrqjBRb5mt2+pofB3RvhZqs4sdft9Xj0RFafV4NLmZPMX+MsOOM55PYivMzavmdHWm4uo9IuSvbtb5HLXxNSXvQsmz2v4S/Cb4ZeLdSuNY+DCa/pngTTrgHUI4rYkTyuDhHk5Ck7T7/KfQ18zjs4xeCUIZvGNStLrezSW9tPx2VzzqVNvF805LnaPQPj/+0P8AA79mbw//AGYLvTNc8STWbW+l+H4DvitS4wGlc/xc98VOU5VmWe4jnoR5aOt29dP1Z6eGoKrLmm9j5f8AhT4O8YfEq38S+DviH4i1rSfELaj9osmSVlWFe6ArwV7DBr67NswweW1aGJwtOFSly2fdvuXWxtKmlGk1qj7N+FXi/wCDHwN+BWo/AJdJu5YtM0KS/j0qzjO/WdWkXajycZZQSehP4dK/PswxWIzTEutXT5ajtzXsoJbfcj67C5xluAyv97C75Xou9up+ePjv9mP4+2FnffEj4m2iaDpkkwlkNw4EjBySqqgOTX63l/FHD85QwWCftJpW8tPM/OqWNw0JKEabbfdaHTfs4fCn4ja74bvvGC/GG68LeFoJRvZJSiXMg6fLwCeB1rm4izTLcJiIUPqqq13+C9T0JZlHCy5KafN1PdvCOs/GrwlqmkS+KPip4ql0XUtPlu7HULaJYLSdI2MaMJJEJdPMVgSoIJjcEgivmq2OUoyjRoxi00mm25a+S/z66I+my/iWrSpPnk3y6WT1vbS+j8nbqu257v8ABj9tBvhZ8Er7/hbeoJ4i8UG5xoz3VuUMqE/IQcA4IK84xiuavinWmoUItW3fRW3NY8byVF02rvoz1PS/2jdSutB0L9jX4Z+IoND8e/EV/tvjfxA0asukWp+ZQCwwWA6D8TXy+WYDFZ7iXi8XZYeMrK/V38uhGAnUxyVKc7Sm7tvojzDXvgDYfBf4zaje/Ez9qLW/G3w+imiC6va+JPs8Nvcg9HeH5fvZxjvxXrZy402sLltOHtLtNxje68r3OfNZ4TC4qK9u5RXmZn7SvwL/AGd9V8daRF8OPjX4q1TUdWjWay8Pr4ninjmDjGJh5jlT35APeuHBVM4wWDtKjFxevM4K61t02fk+lns0d+LWWw9hKhq2rpX39V0/p9TrPDH7K2l/ss6bbfGH4jftS/2dN4fmTUrTwQNe3+a4BKowwCM4445reWNqYijbD0I+0lpzcu3ma1qGFwkFiZ1bNaqKepL4Qn0X9sD4lP8AtW/tWeI76W8vQf8AhGvCOk3jQPDAD0dFwW3YGc8EGvDzjNcyw2J/s7AR91/xJ21fo3seDh6v9qZp9YrtqL7bne+JtT/Zd8Z+JNQbVrdvD2p6jY/Zrn+wfE32a9ECjoQpG3gfXjA9KMFhMyXJSw7+FOXv2t7qb3lo3ZaK929Em2kfRyxvD9Cna0k2raN3+Z2X7CHwv/Z2+D1nqGmfs4+K5JbSUS3Umga3qBklvbzoGWRjy33R/wABFRjuK87y2ssXmVLnTstFZJfI9Lh/H4XLas6mEd1KOsZPd9DpP2bPi58X9W17xx8Sv2mNLm8NQy6mNP0Hw9fTBYo0TgGM4wzM3OfoK63xZgJZhCjRnzQcU27Pd9D0uG88xM8ZXr4u8YvaLvZeh6xH8UdKsUmS+04CUxJNhG2lo275HXFfRYbHYWq3yb+h9dTzjCV0+Um0bxf4b8fMt74fsAEt4zHMsvLmXJBOOoAxiut1G1d6I76VejUhoxms6JP5UiXkQO5NyhVAOOx5z7U6cufS935HRFNQuloU/Dlzp+kpcy+JdPm1EWVld3QH2+G3Fw6JuSJ5nwIlP8TnOBzU18TLARUpK669Dgx1XEU6V6PxX6nyd8Ufg3/wUS/az8R3em+P9LtvhZ4HtZ0Wc3F2BYWyeZ1CRlptUkwRgPsjyeRiuihjMG5JRd2+i3fz6fK79D5NzzbGV5Uqit530tY+zv2dfhb8Nvg18LNE+Cng+0nstG067ka61262vdX1xM5eW4eCMKsKsxJEUYCoCAB0rRV0oOpJKKTS1evlu7vbV6+bu1f0suw9bLsO6cG5W2u/1Oqmla2u57FFdTFK0ZZxt3gE4bBHQ9a3w9eGIXus+hp05zpKbW6I0urgLsZiVAxhmGDXX7KfYyvHoyRHtJR8yqpYdQ3QU/Yz7BzjhFH94sCpHTfkfWj2M+zBSGyJBMeCAcYGGzQ6M30HzEZjkBy0YcEdG7Co9jJdw5kKsbcmRQB2w1HsZ9ilIa7wjIdUYkcBj0o9jLsPmuNV4t2Y/lyOqvjpUOjLsx30HbbqRSDLwecFsih0pbal80SKRH4YxpwPugDGalUpxe34Fb6laWz3ks0KnIxhl6f40pz5dzTklFXex4l8fPGH7buk/E2Dwl+zX+y1b+KtCGji6vvEN1clVjl3OGgC5GSAFOO+6sHTqVqTlSl719rXPJxeNq0qyhBKz63R852PhH/gvN8evH/lada2nw+0y43tbJqFtBaWsKr821wyvLM21T8oAI4JPWvQw9DL7ezqtuXXW33f0zz44vMqbk9l0as/vPq34M2Px6Hw10y0+PP9mah4sgjYatdeHbNltG5O3aCOuMDPc815LdCnUbpP3fM9zCOvLDr27Tl5HSnQNY4zpbqWGT5p6+3NZe1jJaM74030RUm8O62JQzXMEGQdxJY/oOKlSctg9lJ6pMbHoi3AZ28Tc5+aNI8Ac+9dCoVN9TJzcXtYdJ4YtGhZ5dRuHLZbCHHH1xWU2lKzZUanN0K7ab4dtNzW/nDLN8s9wcgg+g7VpCE5arYJuUWPNpYu+UtVLN1bkntyfWlJcj94lVU3uRvb2EQHmW+1AMcYBoi1L4WaKM5apEMt1YWwf7LsdjIYwSgwg/vE9z9K2VCpfVEO6ZV+2WO4uiRFuhYgDPT862VCXZkN6jjPPISYrMH5chQB8o/ClKm4LUS1ZEJLxoCIbYYUYBZxgkdiT+VEKc5r3RzhOC1RXtb/AF0xtHe3dtD5iEMkQDDHbBIFV9XqdUzntrdhDFaxo8Zuz0wTv+8etDoztsXFpvQhupNFteLq8Ve/zSYx9KXspPZFNyXQyb/xT4asic6hG2B8x35yf61aw872aG7qOxly/EbS0RmjunJOCdq+3bNW6E+lzmc+xQvvHUlxI0lhYMzbcbjgZGf/ANdSsPNO9tfQXMZ0mteKbuNvLtII1bliRknIo9jNuzJ51czpm8V3HEusNEScERKoI/OtFQl1TK8yF9KaYSvc69dyNn5w9wcZ+i+1J03HoV7VRWpTudF02H5ZYlfKnJcE8dO/tWXNG4tKivEgj/sOzRg1uY1TGNigDp2/GrUZS2RPPyuxTuta0a3yjw7yQSRIafsalrWE3cqXPi+1CHZbxnOSORkCrVGpbYlszLnxvKzkKI1GM70weeuP6U/Y1OwJ2ZQuPGRkZxHKoZz820YzVU6coSvYcp8qbP/Z",
+              "text/plain": [
+                "<IPython.core.display.Image object>"
+              ]
+            },
+            "execution_count": 15,
+            "metadata": {
+              "image/jpeg": {
+                "height": 256,
+                "width": 256
+              }
+            },
+            "output_type": "execute_result"
+          }
+        ],
+        "source": [
+          "!curl -O https://raw.githubusercontent.com/meta-llama/llama-models/refs/heads/main/Llama_Repo.jpeg\n",
+          "\n",
+          "from IPython.display import Image\n",
+          "Image(\"Llama_Repo.jpeg\", width=256, height=256)"
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": 16,
+        "id": "e1450ecc",
+        "metadata": {},
+        "outputs": [],
+        "source": [
+          "import base64\n",
+          "def encode_image(image_path):\n",
+          "    with open(image_path, \"rb\") as image_file:\n",
+          "        base64_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
+          "        base64_url = f\"data:image/png;base64,{base64_string}\"\n",
+          "        return base64_url"
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": 18,
+        "id": "d7914894",
+        "metadata": {},
+        "outputs": [
+          {
+            "name": "stdout",
+            "output_type": "stream",
+            "text": [
+              "The image features three llamas, each with a distinct color. The llama on the left is white, the middle one is purple, and the one on the right is also white but wears a blue party hat.\n",
+              "\n",
+              "To determine the number of different colors present, we can count the unique hues:\n",
+              "\n",
+              "1. White (two llamas)\n",
+              "2. Purple (one llama)\n",
+              "3. Blue (party hat)\n",
+              "\n",
+              "Therefore, there are 3 different colors visible in the image: white, purple, and blue.\n"
+            ]
+          }
+        ],
+        "source": [
+          "response = client.inference.chat_completion(\n",
+          "    messages=[\n",
+          "        {\n",
+          "            \"role\": \"user\",\n",
+          "            \"content\": [\n",
+          "                {\n",
+          "                    \"type\": \"image\",\n",
+          "                    \"image\": {\n",
+          "                        \"url\": {\n",
+          "                            \"uri\": encode_image(\"Llama_Repo.jpeg\")\n",
+          "                        }\n",
+          "                    }\n",
+          "                },\n",
+          "                {\n",
+          "                    \"type\": \"text\",\n",
+          "                    \"text\": \"How many different colors are those llamas? What are those colors?\",\n",
+          "                }\n",
+          "            ]\n",
+          "        }\n",
+          "    ],\n",
+          "    model_id=model_id,\n",
+          "    stream=False,\n",
+          ")\n",
+          "\n",
+          "print(response.completion_message.content)"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "8cf0d555",
+        "metadata": {
+          "id": "8cf0d555"
+        },
+        "source": [
+          "### 2.4 Have a conversation\n",
+          "\n",
+          "Maintaining a conversation history allows the model to retain context from previous interactions. Use a list to accumulate messages, enabling continuity throughout the chat session."
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": 19,
+        "id": "3fdf9df6",
+        "metadata": {
+          "id": "3fdf9df6"
+        },
+        "outputs": [
+          {
+            "name": "stdout",
+            "output_type": "stream",
+            "text": [
+              "\u001b[36m> Response: The most famous Prime Minister of England during World War 2 was Winston Churchill. He served as the Prime Minister of the United Kingdom from 1940 to 1945, and again from 1951 to 1955. Churchill is widely regarded as one of the greatest wartime leaders in history, known for his leadership, oratory skills, and unwavering resolve during the war.\n",
+              "\n",
+              "Churchill played a crucial role in rallying the British people during the war, and his speeches, such as the \"We shall fight on the beaches\" and \"Their finest hour\" speeches, are still remembered and celebrated today. He worked closely with other Allied leaders, including US President Franklin D. Roosevelt and Soviet leader Joseph Stalin, to coordinate the war effort and ultimately secure the defeat of Nazi Germany.\n",
+              "\n",
+              "Churchill's leadership and legacy have endured long after the war, and he remains one of the most iconic and influential figures in British history.\u001b[0m\n",
+              "\u001b[36m> Response: Winston Churchill was known for his many memorable quotes, but one of his most famous is:\n",
+              "\n",
+              "**\"We shall fight on the beaches, we shall fight on the landing grounds, we shall fight in the fields and in the streets, we shall fight in the hills; we shall never surrender.\"**\n",
+              "\n",
+              "This quote is from his speech to the House of Commons on June 4, 1940, during the early stages of World War II, when Nazi Germany was threatening to invade Britain. The speech is known as the \"We Shall Fight on the Beaches\" speech, and it's considered one of the greatest speeches of the 20th century.\n",
+              "\n",
+              "However, if I had to pick a single, even more concise quote, it would be:\n",
+              "\n",
+              "**\"Blood, toil, tears, and sweat.\"**\n",
+              "\n",
+              "This was the opening phrase of his first speech as Prime Minister to the House of Commons on May 13, 1940, in which he said:\n",
+              "\n",
+              "\"I say to the House as I said to those who have joined this Government, I have nothing to offer but blood, toil, tears, and sweat. We have before us an ordeal of the most grievous kind.\"\n",
+              "\n",
+              "This quote has become synonymous with Churchill's leadership and resolve during the war.\u001b[0m\n"
+            ]
+          }
+        ],
+        "source": [
+          "from termcolor import cprint\n",
+          "\n",
+          "questions = [\n",
+          "    \"Who was the most famous PM of England during world war 2 ?\",\n",
+          "    \"What was his most famous quote ?\"\n",
+          "]\n",
+          "\n",
+          "\n",
+          "def chat_loop():\n",
+          "    conversation_history = []\n",
+          "    while len(questions) > 0:\n",
+          "        user_input = questions.pop(0)\n",
+          "        if user_input.lower() in [\"exit\", \"quit\", \"bye\"]:\n",
+          "            cprint(\"Ending conversation. Goodbye!\", \"yellow\")\n",
+          "            break\n",
+          "\n",
+          "        user_message = {\"role\": \"user\", \"content\": user_input}\n",
+          "        conversation_history.append(user_message)\n",
+          "\n",
+          "        response = client.inference.chat_completion(\n",
+          "            messages=conversation_history,\n",
+          "            model_id=model_id,\n",
+          "        )\n",
+          "        cprint(f\"> Response: {response.completion_message.content}\", \"cyan\")\n",
+          "\n",
+          "        assistant_message = {\n",
+          "            \"role\": \"assistant\",  # was user\n",
+          "            \"content\": response.completion_message.content,\n",
+          "            \"stop_reason\": response.completion_message.stop_reason,\n",
+          "        }\n",
+          "        conversation_history.append(assistant_message)\n",
+          "\n",
+          "\n",
+          "chat_loop()\n"
+        ]
+      },
+      {
+        "cell_type": "markdown",
+        "id": "72e5111e",
+        "metadata": {
+          "id": "72e5111e"
+        },
+        "source": [
+          "Here is an example for you to try a conversation yourself.\n",
+          "Remember to type `quit` or `exit` after you are done chatting."
+        ]
+      },
+      {
+        "cell_type": "code",
+        "execution_count": 35,
+        "id": "9496f75c",
+        "metadata": {
+          "colab": {
+            "base_uri": "https://localhost:8080/"
+          },
+          "id": "9496f75c",
+          "outputId": "7d93a4cf-a5d4-4741-b6eb-6bce3a27ff66"
+        },
+        "outputs": [
+          {
+            "name": "stdout",
+            "output_type": "stream",
+            "text": [
+              "\u001b[36m> Response: Hello! How are you today? Is there something I can help you with or would you like to chat?\u001b[0m\n",
+              "\u001b[33mEnding conversation. Goodbye!\u001b[0m\n"
+            ]
+          }
+        ],
+        "source": [
+          "# NBVAL_SKIP\n",
+          "from termcolor import cprint\n",
+          "\n",
+          "def chat_loop():\n",
+          "    conversation_history = []\n",
+          "    while True:\n",
+          "        user_input = input(\"User> \")\n",
+          "        if user_input.lower() in [\"exit\", \"quit\", \"bye\"]:\n",
+          "            cprint(\"Ending conversation. Goodbye!\", \"yellow\")\n",
+          "            break\n",
+          "\n",
+          "        user_message = {\"role\": \"user\", \"content\": user_input}\n",
+          "        conversation_history.append(user_message)\n",
+          "\n",
+          "        response = client.inference.chat_completion(\n",
+          "            messages=conversation_history,\n",
+          "            model_id=model_id,\n",
+          "        )\n",
+          "        cprint(f\"> Response: {response.completion_message.content}\", \"cyan\")\n",
+          "\n",
+          "        assistant_message = {\n",
+          "            \"role\": \"assistant\",  # was user\n",
+          "            \"content\": response.completion_message.content,\n",
+          "            \"stop_reason\": response.completion_message.stop_reason,\n",
+          "        }\n",
+          "        conversation_history.append(assistant_message)\n",
+          "\n",
+          "\n",
+          "chat_loop()\n"
+        ]
+      }
+    ],
+    "metadata": {
+      "accelerator": "GPU",
+      "colab": {
+        "gpuType": "T4",
+        "provenance": []
+      },
+      "kernelspec": {
+        "display_name": "l4",
+        "language": "python",
+        "name": "python3"
+      },
+      "language_info": {
+        "codemirror_mode": {
+          "name": "ipython",
+          "version": 3
+        },
+        "file_extension": ".py",
+        "mimetype": "text/x-python",
+        "name": "python",
+        "nbconvert_exporter": "python",
+        "pygments_lexer": "ipython3",
+        "version": "3.10.16"
+      }
+    },
+    "nbformat": 4,
+    "nbformat_minor": 5
+  }
diff --git a/docs/make.bat b/docs/make.bat
index 32bb24529..954237b9b 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -1,35 +1,35 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.https://www.sphinx-doc.org/
-	exit /b 1
-)
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
index 5de7f715e..93f78d268 100644
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
@@ -38,12 +38,8 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
         "collapsed": true,
-        "id": "O9pGVlPIjpix",
-        "outputId": "e1fbe723-ae31-4630-eb80-4c4f6476d56f"
+        "id": "O9pGVlPIjpix"
       },
       "outputs": [],
       "source": [
@@ -55,12 +51,8 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
         "collapsed": true,
-        "id": "JQpLUSNjlGAM",
-        "outputId": "2f7fec97-5511-4cae-d51e-6d262fbca19c"
+        "id": "JQpLUSNjlGAM"
       },
       "outputs": [],
       "source": [
@@ -70,7 +62,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -337,9 +329,6 @@
               "    provider_id: tavily-search\n",
               "    provider_type: remote::tavily-search\n",
               "  - config: <span style=\"font-weight: bold\">{}</span>\n",
-              "    provider_id: code-interpreter\n",
-              "    provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::c</span>ode-interpreter\n",
-              "  - config: <span style=\"font-weight: bold\">{}</span>\n",
               "    provider_id: rag-runtime\n",
               "    provider_type: inline::rag-runtime\n",
               "  - config: <span style=\"font-weight: bold\">{}</span>\n",
@@ -378,10 +367,6 @@
               "  toolgroup_id: builtin::rag\n",
               "- args: null\n",
               "  mcp_endpoint: null\n",
-              "  provider_id: code-interpreter\n",
-              "  toolgroup_id: builtin::code_interpreter\n",
-              "- args: null\n",
-              "  mcp_endpoint: null\n",
               "  provider_id: wolfram-alpha\n",
               "  toolgroup_id: builtin::wolfram_alpha\n",
               "vector_dbs: <span style=\"font-weight: bold\">[]</span>\n",
@@ -617,9 +602,6 @@
               "    provider_id: tavily-search\n",
               "    provider_type: remote::tavily-search\n",
               "  - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
-              "    provider_id: code-interpreter\n",
-              "    provider_type: inlin\u001b[1;92me::c\u001b[0mode-interpreter\n",
-              "  - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
               "    provider_id: rag-runtime\n",
               "    provider_type: inline::rag-runtime\n",
               "  - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
@@ -658,10 +640,6 @@
               "  toolgroup_id: builtin::rag\n",
               "- args: null\n",
               "  mcp_endpoint: null\n",
-              "  provider_id: code-interpreter\n",
-              "  toolgroup_id: builtin::code_interpreter\n",
-              "- args: null\n",
-              "  mcp_endpoint: null\n",
               "  provider_id: wolfram-alpha\n",
               "  toolgroup_id: builtin::wolfram_alpha\n",
               "vector_dbs: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
@@ -715,7 +693,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": null,
       "metadata": {
         "id": "TC_IwIAQo4q-"
       },
@@ -728,116 +706,10 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": null,
       "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 305,
-          "referenced_widgets": [
-            "feb82e061ee44283b4a46be858ef4cd7",
-            "78a2d2d4ee3f42f3be42ef4baa298561",
-            "ba5e6ca09f174ef3a348453cf5cfc24a",
-            "74b58e4647644c9daf9af488942fdaf4",
-            "d56e218958a041e286e80f24e400ab0b",
-            "cab80632b7564a9eb59583e09573c1ee",
-            "10c0d50d7c204de0b4c8e8f4d3ec0af5",
-            "626ef2f811ae4e119a0e85cebe92b91d",
-            "aef4172d916f40b0ab4ed09104e10f24",
-            "25529e7fd57049d2816d31f696eab1fd",
-            "093bdcb608cf4b4fa37b0032a3915187",
-            "c788d4e9e1e24dca9b6503689df9b631",
-            "d1587e2144bf46299c1bdec3ea96e4e7",
-            "500a072c09da41759cb2c942a16d8429",
-            "9785009392934e3bbb229e8781667cbc",
-            "84570fe2c2a54a068fb9b8cbc8b041a1",
-            "f9e579c58e3f4ae0bbb721dffa33bf0a",
-            "737116977f474ec0b68d88a40fd1086c",
-            "e6d6e516cd03452297d80c36376855dd",
-            "6ae0fadb3aeb4be18a9ab3279fb23145",
-            "fa4800a506ac480984d58933580df086",
-            "117468099dbc42fdaafc08207eaac7ab",
-            "44f585990aa244d8ba61f892dc1ccc1c",
-            "4fc59928a0544f95a4438b37d19ca437",
-            "fb644d47049f495397d0e60597c86ea3",
-            "78632694ff694442bc3fefc2cac2cbf5",
-            "083fd2549abd4b03bd41d8b92ec28f42",
-            "611d6472a58d419583acc416767a4c90",
-            "98c5ce434cff454eaaa3f0fd3498183a",
-            "3d0344a9cc744e369da1b6b7ea1b3be8",
-            "c452ccbf47a44073aee710175f707a7d",
-            "0218397c573e4b28bfb4ffa66464d50f",
-            "9b01bcd6e5174be2af19f457047017c8",
-            "4fed5720f30b4b3cbbc606a4f25e223b",
-            "6fa866b9971542739b0ed26d90ceac80",
-            "fe7553b513954cc68c427b5d9d260b33",
-            "4bc266d49a6741a88350e029d101425b",
-            "da57445f98e7427589962836c2b4287e",
-            "ad1fb86cc1f94fd9911eda03cf4a3783",
-            "fdefb51ad4c4418b98c5826126558011",
-            "179d41b80dc841e8a440482516b8bca5",
-            "22b1ecd2eff14770bcfb0c62d3d4213f",
-            "47f876cf41484d55b645e1e99337423a",
-            "340fbbb4982c460992c88885e79b47db",
-            "9659140487ca4d3ea799196d2c1ecf61",
-            "52150fd494d24eea89b5232077509355",
-            "04acde771d0a46699e1de07d9733d1a3",
-            "7b98103300814f3caea84266263b95a2",
-            "75f06408071c494f934bb909b84110d1",
-            "b09b2690894749339a9172e5ad0a9b75",
-            "cbed38801163438d891879b756f5baab",
-            "399a6417b23e4593bb244ec3abb6b46d",
-            "53a321f36b0d4e08a74a5bcfbd04434b",
-            "b8c0c8aaac0d4032bf5c673a43d084ab",
-            "d1f32499fa3f4795b92361637e23a9bb",
-            "c06f9a090fb54c74b947634bf6d11fa8",
-            "82991dcc80f14af9bd2e95f705980676",
-            "cd832e3842b945aabbb327856053f261",
-            "93ee645d54f34acdb0d15092d4a6f0d1",
-            "b77fe05bbcf84cdc8ef85b264ccd35f6",
-            "e17d286a965a49cfb8d5bf885865cb1e",
-            "ca015c1a0c1449e68edb282462435a3f",
-            "2932b06afde9468a976eb6bfb072b80e",
-            "d027c807ddc04f89bec41dc05fde7718",
-            "4ff3a6aaf706460bbba01b248b93000e",
-            "bfd75a39f0154c30adbaad1e2ca0f1e2",
-            "4f788a7920c346f3b42900825bd6711a",
-            "8e9358ec7d474808bb96c13e13489c67",
-            "f0dfeee2a8d64dedbc8ef55ad4e69932",
-            "9437b707bf1a4847a50aafeb4252dab5",
-            "f255707788704a76bd1651f26a22402d",
-            "3b70fa4e43ef4951862e119378c3c501",
-            "6c0a6a7fa8ca4e1c961a36305f0e7638",
-            "201bd914f9884e46b8e6df9d9900a6e8",
-            "f53b7ada01084e73bba6e14a95e2a534",
-            "d2029292327b488db02fd123ee2b75af",
-            "3e26bc24a3e44b4582f57913bdf98de4",
-            "9d2b6eabf7e14436b72bbf374b4a2a0a",
-            "b5d7cb5a6157449a850ef0e12e3d3eb7",
-            "c245d316bf9e44dabe5bfd1e47fc8d2e",
-            "963cf422ca894d82b0dd94c6165d41bf",
-            "78d0e2aa93674bbeb42bff87a23cce9b",
-            "12c6f1180eeb4e9eb9037ea5dd24ec8e",
-            "017a81d7160240a398947545963856f5",
-            "1cf8eeb8d81c4e8a8e95dd43296a78b9",
-            "5b0b5a3f79e94c51aae48fe0dd34ba0e",
-            "f5b34a743ce54fb591f25b04a2651d65",
-            "dec6399e2c5341aead66e1674d3e6c72",
-            "24e48376a72940679989a39a40bbe7f6",
-            "484df732051540859bc7ac9cecadc83c",
-            "4b33b1db50c34a2fa957d81a71a2a47f",
-            "e51d501e2f994baba40345ad632eabee",
-            "631a85e420b64e8cb6915af59c5ce08a",
-            "70af9cb2838c4a92bd67f8cb5c98d97f",
-            "158115266c284c4f8dbce3586151cbf1",
-            "ce5019b36cde44c58c5f596dbb59a2f8",
-            "b90d660ca8584ba1815a3c66b420c079",
-            "7c4d1de626784a59a7e0a33c24086186",
-            "21cf0e35ecd845a8b5e7c5ce241cf177"
-          ]
-        },
         "collapsed": true,
-        "id": "DJkmoG2kq1_P",
-        "outputId": "8493ee59-c6ff-4bb6-d787-f295944db1cf"
+        "id": "DJkmoG2kq1_P"
       },
       "outputs": [],
       "source": [
@@ -862,7 +734,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -963,7 +835,7 @@
         "\n",
         "client.benchmarks.register(\n",
         "    benchmark_id=\"meta-reference::mmmu\",\n",
-        "    # Note: we can use any value as `dataset_id` because we'll be using the `evaluate_rows` API which accepts the \n",
+        "    # Note: we can use any value as `dataset_id` because we'll be using the `evaluate_rows` API which accepts the\n",
         "    # `input_rows` argument and does not fetch data from the dataset.\n",
         "    dataset_id=f\"mmmu-{subset}-{split}\",\n",
         "    # Note: for the same reason as above, we can use any value as `scoring_functions`.\n",
@@ -1008,7 +880,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": null,
       "metadata": {
         "id": "HXmZf3Ymw-aX"
       },
@@ -1028,7 +900,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": null,
       "metadata": {
         "id": "Gc8azb4Rxr5J"
       },
@@ -1042,7 +914,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -1182,7 +1054,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 27,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -1307,7 +1179,9 @@
     {
       "cell_type": "code",
       "execution_count": null,
-      "metadata": {},
+      "metadata": {
+        "id": "lxc9-eXYK5Av"
+      },
       "outputs": [],
       "source": []
     }
@@ -1336,3088 +1210,6 @@
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
       "version": "3.10.16"
-    },
-    "widgets": {
-      "application/vnd.jupyter.widget-state+json": {
-        "017a81d7160240a398947545963856f5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "0218397c573e4b28bfb4ffa66464d50f": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "04acde771d0a46699e1de07d9733d1a3": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_399a6417b23e4593bb244ec3abb6b46d",
-            "max": 453677660,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_53a321f36b0d4e08a74a5bcfbd04434b",
-            "value": 453677660
-          }
-        },
-        "083fd2549abd4b03bd41d8b92ec28f42": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "093bdcb608cf4b4fa37b0032a3915187": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "10c0d50d7c204de0b4c8e8f4d3ec0af5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "117468099dbc42fdaafc08207eaac7ab": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "12c6f1180eeb4e9eb9037ea5dd24ec8e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "158115266c284c4f8dbce3586151cbf1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "179d41b80dc841e8a440482516b8bca5": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "1cf8eeb8d81c4e8a8e95dd43296a78b9": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "201bd914f9884e46b8e6df9d9900a6e8": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "21cf0e35ecd845a8b5e7c5ce241cf177": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "22b1ecd2eff14770bcfb0c62d3d4213f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "24e48376a72940679989a39a40bbe7f6": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_484df732051540859bc7ac9cecadc83c",
-              "IPY_MODEL_4b33b1db50c34a2fa957d81a71a2a47f",
-              "IPY_MODEL_e51d501e2f994baba40345ad632eabee"
-            ],
-            "layout": "IPY_MODEL_631a85e420b64e8cb6915af59c5ce08a"
-          }
-        },
-        "25529e7fd57049d2816d31f696eab1fd": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "2932b06afde9468a976eb6bfb072b80e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "340fbbb4982c460992c88885e79b47db": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "399a6417b23e4593bb244ec3abb6b46d": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3b70fa4e43ef4951862e119378c3c501": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3d0344a9cc744e369da1b6b7ea1b3be8": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3e26bc24a3e44b4582f57913bdf98de4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "44f585990aa244d8ba61f892dc1ccc1c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_4fc59928a0544f95a4438b37d19ca437",
-              "IPY_MODEL_fb644d47049f495397d0e60597c86ea3",
-              "IPY_MODEL_78632694ff694442bc3fefc2cac2cbf5"
-            ],
-            "layout": "IPY_MODEL_083fd2549abd4b03bd41d8b92ec28f42"
-          }
-        },
-        "47f876cf41484d55b645e1e99337423a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "484df732051540859bc7ac9cecadc83c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_70af9cb2838c4a92bd67f8cb5c98d97f",
-            "placeholder": "​",
-            "style": "IPY_MODEL_158115266c284c4f8dbce3586151cbf1",
-            "value": "Generating test split: 100%"
-          }
-        },
-        "4b33b1db50c34a2fa957d81a71a2a47f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_ce5019b36cde44c58c5f596dbb59a2f8",
-            "max": 287,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_b90d660ca8584ba1815a3c66b420c079",
-            "value": 287
-          }
-        },
-        "4bc266d49a6741a88350e029d101425b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_47f876cf41484d55b645e1e99337423a",
-            "placeholder": "​",
-            "style": "IPY_MODEL_340fbbb4982c460992c88885e79b47db",
-            "value": " 461M/461M [00:11&lt;00:00, 31.2MB/s]"
-          }
-        },
-        "4f788a7920c346f3b42900825bd6711a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_8e9358ec7d474808bb96c13e13489c67",
-              "IPY_MODEL_f0dfeee2a8d64dedbc8ef55ad4e69932",
-              "IPY_MODEL_9437b707bf1a4847a50aafeb4252dab5"
-            ],
-            "layout": "IPY_MODEL_f255707788704a76bd1651f26a22402d"
-          }
-        },
-        "4fc59928a0544f95a4438b37d19ca437": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_611d6472a58d419583acc416767a4c90",
-            "placeholder": "​",
-            "style": "IPY_MODEL_98c5ce434cff454eaaa3f0fd3498183a",
-            "value": "validation-00000-of-00001.parquet: 100%"
-          }
-        },
-        "4fed5720f30b4b3cbbc606a4f25e223b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_6fa866b9971542739b0ed26d90ceac80",
-              "IPY_MODEL_fe7553b513954cc68c427b5d9d260b33",
-              "IPY_MODEL_4bc266d49a6741a88350e029d101425b"
-            ],
-            "layout": "IPY_MODEL_da57445f98e7427589962836c2b4287e"
-          }
-        },
-        "4ff3a6aaf706460bbba01b248b93000e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "500a072c09da41759cb2c942a16d8429": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e6d6e516cd03452297d80c36376855dd",
-            "max": 29453850,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_6ae0fadb3aeb4be18a9ab3279fb23145",
-            "value": 29453850
-          }
-        },
-        "52150fd494d24eea89b5232077509355": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b09b2690894749339a9172e5ad0a9b75",
-            "placeholder": "​",
-            "style": "IPY_MODEL_cbed38801163438d891879b756f5baab",
-            "value": "test-00001-of-00003.parquet: 100%"
-          }
-        },
-        "53a321f36b0d4e08a74a5bcfbd04434b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "5b0b5a3f79e94c51aae48fe0dd34ba0e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "611d6472a58d419583acc416767a4c90": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "626ef2f811ae4e119a0e85cebe92b91d": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "631a85e420b64e8cb6915af59c5ce08a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "6ae0fadb3aeb4be18a9ab3279fb23145": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "6c0a6a7fa8ca4e1c961a36305f0e7638": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "6fa866b9971542739b0ed26d90ceac80": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_ad1fb86cc1f94fd9911eda03cf4a3783",
-            "placeholder": "​",
-            "style": "IPY_MODEL_fdefb51ad4c4418b98c5826126558011",
-            "value": "test-00000-of-00003.parquet: 100%"
-          }
-        },
-        "70af9cb2838c4a92bd67f8cb5c98d97f": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "737116977f474ec0b68d88a40fd1086c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "74b58e4647644c9daf9af488942fdaf4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_25529e7fd57049d2816d31f696eab1fd",
-            "placeholder": "​",
-            "style": "IPY_MODEL_093bdcb608cf4b4fa37b0032a3915187",
-            "value": " 36.0k/36.0k [00:00&lt;00:00, 1.29MB/s]"
-          }
-        },
-        "75f06408071c494f934bb909b84110d1": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "78632694ff694442bc3fefc2cac2cbf5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_0218397c573e4b28bfb4ffa66464d50f",
-            "placeholder": "​",
-            "style": "IPY_MODEL_9b01bcd6e5174be2af19f457047017c8",
-            "value": " 165M/165M [00:03&lt;00:00, 42.9MB/s]"
-          }
-        },
-        "78a2d2d4ee3f42f3be42ef4baa298561": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_cab80632b7564a9eb59583e09573c1ee",
-            "placeholder": "​",
-            "style": "IPY_MODEL_10c0d50d7c204de0b4c8e8f4d3ec0af5",
-            "value": "README.md: 100%"
-          }
-        },
-        "78d0e2aa93674bbeb42bff87a23cce9b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "7b98103300814f3caea84266263b95a2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b8c0c8aaac0d4032bf5c673a43d084ab",
-            "placeholder": "​",
-            "style": "IPY_MODEL_d1f32499fa3f4795b92361637e23a9bb",
-            "value": " 454M/454M [00:11&lt;00:00, 40.4MB/s]"
-          }
-        },
-        "7c4d1de626784a59a7e0a33c24086186": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "82991dcc80f14af9bd2e95f705980676": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e17d286a965a49cfb8d5bf885865cb1e",
-            "placeholder": "​",
-            "style": "IPY_MODEL_ca015c1a0c1449e68edb282462435a3f",
-            "value": "test-00002-of-00003.parquet: 100%"
-          }
-        },
-        "84570fe2c2a54a068fb9b8cbc8b041a1": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "8e9358ec7d474808bb96c13e13489c67": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_3b70fa4e43ef4951862e119378c3c501",
-            "placeholder": "​",
-            "style": "IPY_MODEL_6c0a6a7fa8ca4e1c961a36305f0e7638",
-            "value": "Generating dev split: 100%"
-          }
-        },
-        "93ee645d54f34acdb0d15092d4a6f0d1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_4ff3a6aaf706460bbba01b248b93000e",
-            "placeholder": "​",
-            "style": "IPY_MODEL_bfd75a39f0154c30adbaad1e2ca0f1e2",
-            "value": " 471M/471M [00:11&lt;00:00, 41.5MB/s]"
-          }
-        },
-        "9437b707bf1a4847a50aafeb4252dab5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_d2029292327b488db02fd123ee2b75af",
-            "placeholder": "​",
-            "style": "IPY_MODEL_3e26bc24a3e44b4582f57913bdf98de4",
-            "value": " 5/5 [00:00&lt;00:00,  8.03 examples/s]"
-          }
-        },
-        "963cf422ca894d82b0dd94c6165d41bf": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_f5b34a743ce54fb591f25b04a2651d65",
-            "placeholder": "​",
-            "style": "IPY_MODEL_dec6399e2c5341aead66e1674d3e6c72",
-            "value": " 30/30 [00:03&lt;00:00,  8.23 examples/s]"
-          }
-        },
-        "9659140487ca4d3ea799196d2c1ecf61": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_52150fd494d24eea89b5232077509355",
-              "IPY_MODEL_04acde771d0a46699e1de07d9733d1a3",
-              "IPY_MODEL_7b98103300814f3caea84266263b95a2"
-            ],
-            "layout": "IPY_MODEL_75f06408071c494f934bb909b84110d1"
-          }
-        },
-        "9785009392934e3bbb229e8781667cbc": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_fa4800a506ac480984d58933580df086",
-            "placeholder": "​",
-            "style": "IPY_MODEL_117468099dbc42fdaafc08207eaac7ab",
-            "value": " 29.5M/29.5M [00:00&lt;00:00, 36.5MB/s]"
-          }
-        },
-        "98c5ce434cff454eaaa3f0fd3498183a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "9b01bcd6e5174be2af19f457047017c8": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "9d2b6eabf7e14436b72bbf374b4a2a0a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_b5d7cb5a6157449a850ef0e12e3d3eb7",
-              "IPY_MODEL_c245d316bf9e44dabe5bfd1e47fc8d2e",
-              "IPY_MODEL_963cf422ca894d82b0dd94c6165d41bf"
-            ],
-            "layout": "IPY_MODEL_78d0e2aa93674bbeb42bff87a23cce9b"
-          }
-        },
-        "ad1fb86cc1f94fd9911eda03cf4a3783": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "aef4172d916f40b0ab4ed09104e10f24": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "b09b2690894749339a9172e5ad0a9b75": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b5d7cb5a6157449a850ef0e12e3d3eb7": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_12c6f1180eeb4e9eb9037ea5dd24ec8e",
-            "placeholder": "​",
-            "style": "IPY_MODEL_017a81d7160240a398947545963856f5",
-            "value": "Generating validation split: 100%"
-          }
-        },
-        "b77fe05bbcf84cdc8ef85b264ccd35f6": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b8c0c8aaac0d4032bf5c673a43d084ab": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b90d660ca8584ba1815a3c66b420c079": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "ba5e6ca09f174ef3a348453cf5cfc24a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_626ef2f811ae4e119a0e85cebe92b91d",
-            "max": 36030,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_aef4172d916f40b0ab4ed09104e10f24",
-            "value": 36030
-          }
-        },
-        "bfd75a39f0154c30adbaad1e2ca0f1e2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "c06f9a090fb54c74b947634bf6d11fa8": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_82991dcc80f14af9bd2e95f705980676",
-              "IPY_MODEL_cd832e3842b945aabbb327856053f261",
-              "IPY_MODEL_93ee645d54f34acdb0d15092d4a6f0d1"
-            ],
-            "layout": "IPY_MODEL_b77fe05bbcf84cdc8ef85b264ccd35f6"
-          }
-        },
-        "c245d316bf9e44dabe5bfd1e47fc8d2e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_1cf8eeb8d81c4e8a8e95dd43296a78b9",
-            "max": 30,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_5b0b5a3f79e94c51aae48fe0dd34ba0e",
-            "value": 30
-          }
-        },
-        "c452ccbf47a44073aee710175f707a7d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "c788d4e9e1e24dca9b6503689df9b631": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_d1587e2144bf46299c1bdec3ea96e4e7",
-              "IPY_MODEL_500a072c09da41759cb2c942a16d8429",
-              "IPY_MODEL_9785009392934e3bbb229e8781667cbc"
-            ],
-            "layout": "IPY_MODEL_84570fe2c2a54a068fb9b8cbc8b041a1"
-          }
-        },
-        "ca015c1a0c1449e68edb282462435a3f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "cab80632b7564a9eb59583e09573c1ee": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "cbed38801163438d891879b756f5baab": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "cd832e3842b945aabbb327856053f261": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_2932b06afde9468a976eb6bfb072b80e",
-            "max": 470745176,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_d027c807ddc04f89bec41dc05fde7718",
-            "value": 470745176
-          }
-        },
-        "ce5019b36cde44c58c5f596dbb59a2f8": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "d027c807ddc04f89bec41dc05fde7718": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "d1587e2144bf46299c1bdec3ea96e4e7": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_f9e579c58e3f4ae0bbb721dffa33bf0a",
-            "placeholder": "​",
-            "style": "IPY_MODEL_737116977f474ec0b68d88a40fd1086c",
-            "value": "dev-00000-of-00001.parquet: 100%"
-          }
-        },
-        "d1f32499fa3f4795b92361637e23a9bb": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "d2029292327b488db02fd123ee2b75af": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "d56e218958a041e286e80f24e400ab0b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "da57445f98e7427589962836c2b4287e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "dec6399e2c5341aead66e1674d3e6c72": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "e17d286a965a49cfb8d5bf885865cb1e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "e51d501e2f994baba40345ad632eabee": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_7c4d1de626784a59a7e0a33c24086186",
-            "placeholder": "​",
-            "style": "IPY_MODEL_21cf0e35ecd845a8b5e7c5ce241cf177",
-            "value": " 287/287 [00:23&lt;00:00, 12.48 examples/s]"
-          }
-        },
-        "e6d6e516cd03452297d80c36376855dd": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "f0dfeee2a8d64dedbc8ef55ad4e69932": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_201bd914f9884e46b8e6df9d9900a6e8",
-            "max": 5,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_f53b7ada01084e73bba6e14a95e2a534",
-            "value": 5
-          }
-        },
-        "f255707788704a76bd1651f26a22402d": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "f53b7ada01084e73bba6e14a95e2a534": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "f5b34a743ce54fb591f25b04a2651d65": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "f9e579c58e3f4ae0bbb721dffa33bf0a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "fa4800a506ac480984d58933580df086": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "fb644d47049f495397d0e60597c86ea3": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_3d0344a9cc744e369da1b6b7ea1b3be8",
-            "max": 165333397,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_c452ccbf47a44073aee710175f707a7d",
-            "value": 165333397
-          }
-        },
-        "fdefb51ad4c4418b98c5826126558011": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "fe7553b513954cc68c427b5d9d260b33": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_179d41b80dc841e8a440482516b8bca5",
-            "max": 461411018,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_22b1ecd2eff14770bcfb0c62d3d4213f",
-            "value": 461411018
-          }
-        },
-        "feb82e061ee44283b4a46be858ef4cd7": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_78a2d2d4ee3f42f3be42ef4baa298561",
-              "IPY_MODEL_ba5e6ca09f174ef3a348453cf5cfc24a",
-              "IPY_MODEL_74b58e4647644c9daf9af488942fdaf4"
-            ],
-            "layout": "IPY_MODEL_d56e218958a041e286e80f24e400ab0b"
-          }
-        }
-      }
     }
   },
   "nbformat": 4,
diff --git a/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb b/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb
index 399a3bff1..e70cc3bbe 100644
--- a/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb
+++ b/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb
@@ -840,7 +840,6 @@
     "    \"memory_optimizations.rst\",\n",
     "    \"chat.rst\",\n",
     "    \"llama3.rst\",\n",
-    "    \"datasets.rst\",\n",
     "    \"qat_finetune.rst\",\n",
     "    \"lora_finetune.rst\",\n",
     "]\n",
@@ -1586,7 +1585,6 @@
     "    \"memory_optimizations.rst\",\n",
     "    \"chat.rst\",\n",
     "    \"llama3.rst\",\n",
-    "    \"datasets.rst\",\n",
     "    \"qat_finetune.rst\",\n",
     "    \"lora_finetune.rst\",\n",
     "]\n",
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index caa4f17ff..9fc375175 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -44,7 +44,7 @@ def main(output_dir: str):
     if return_type_errors:
         print("\nAPI Method Return Type Validation Errors:\n")
         for error in return_type_errors:
-            print(error)
+            print(error, file=sys.stderr)
         sys.exit(1)
     now = str(datetime.now())
     print(
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 3936bb3c4..5b7a685c1 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -6,6 +6,7 @@
 
 import hashlib
 import ipaddress
+import types
 import typing
 from dataclasses import make_dataclass
 from typing import Any, Dict, Set, Union
@@ -179,7 +180,7 @@ class ContentBuilder:
         "Creates the content subtree for a request or response."
 
         def is_iterator_type(t):
-            return "StreamChunk" in str(t)
+            return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
 
         def get_media_type(t):
             if is_generic_list(t):
@@ -189,7 +190,7 @@ class ContentBuilder:
             else:
                 return "application/json"
 
-        if typing.get_origin(payload_type) is typing.Union:
+        if typing.get_origin(payload_type) in (typing.Union, types.UnionType):
             media_types = []
             item_types = []
             for x in typing.get_args(payload_type):
@@ -758,7 +759,7 @@ class Generator:
         )
 
         return Operation(
-            tags=[op.defining_class.__name__],
+            tags=[getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)],
             summary=None,
             # summary=doc_string.short_description,
             description=description,
@@ -804,6 +805,8 @@ class Generator:
         operation_tags: List[Tag] = []
         for cls in endpoint_classes:
             doc_string = parse_type(cls)
+            if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
+                continue
             operation_tags.append(
                 Tag(
                     name=cls.__name__,
diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py
index db18e8430..12a69050c 100644
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@@ -174,14 +174,64 @@ def _validate_list_parameters_contain_data(method) -> str | None:
         return "does not have a mandatory data attribute containing the list of objects"
 
 
+def _validate_has_ellipsis(method) -> str | None:
+    source = inspect.getsource(method)
+    if "..." not in source and not "NotImplementedError" in source:
+        return "does not contain ellipsis (...) in its implementation"
+
+def _validate_has_return_in_docstring(method) -> str | None:
+    source = inspect.getsource(method)
+    return_type = method.__annotations__.get('return')
+    if return_type is not None and return_type != type(None) and ":returns:" not in source:
+        return "does not have a ':returns:' in its docstring"
+
+def _validate_has_params_in_docstring(method) -> str | None:
+    source = inspect.getsource(method)
+    sig = inspect.signature(method)
+    # Only check if the method has more than one parameter
+    if len(sig.parameters) > 1 and ":param" not in source:
+        return "does not have a ':param' in its docstring"
+
+def _validate_has_no_return_none_in_docstring(method) -> str | None:
+    source = inspect.getsource(method)
+    return_type = method.__annotations__.get('return')
+    if return_type is None and ":returns: None" in source:
+        return "has a ':returns: None' in its docstring which is redundant for None-returning functions"
+
+def _validate_docstring_lines_end_with_dot(method) -> str | None:
+    docstring = inspect.getdoc(method)
+    if docstring is None:
+        return None
+
+    lines = docstring.split('\n')
+    for line in lines:
+        line = line.strip()
+        if line and not any(line.endswith(char) for char in '.:{}[]()",'):
+            return f"docstring line '{line}' does not end with a valid character: . : {{ }} [ ] ( ) , \""
+
 _VALIDATORS = {
     "GET": [
         _validate_api_method_return_type,
         _validate_list_parameters_contain_data,
         _validate_api_method_doesnt_return_list,
+        _validate_has_ellipsis,
+        _validate_has_return_in_docstring,
+        _validate_has_params_in_docstring,
+        _validate_docstring_lines_end_with_dot,
     ],
     "DELETE": [
         _validate_api_delete_method_returns_none,
+        _validate_has_ellipsis,
+        _validate_has_return_in_docstring,
+        _validate_has_params_in_docstring,
+        _validate_has_no_return_none_in_docstring
+    ],
+    "POST": [
+        _validate_has_ellipsis,
+        _validate_has_return_in_docstring,
+        _validate_has_params_in_docstring,
+        _validate_has_no_return_none_in_docstring,
+        _validate_docstring_lines_end_with_dot,
     ],
 }
 
diff --git a/docs/readme.md b/docs/readme.md
index b88a4738d..c238c4720 100644
--- a/docs/readme.md
+++ b/docs/readme.md
@@ -3,10 +3,10 @@
 Here's a collection of comprehensive guides, examples, and resources for building AI applications with Llama Stack. For the complete documentation, visit our [ReadTheDocs page](https://llama-stack.readthedocs.io/en/latest/index.html).
 
 ## Render locally
+
+From the llama-stack root directory, run the following command to render the docs locally:
 ```bash
-pip install -r requirements.txt
-cd docs
-python -m sphinx_autobuild source _build
+uv run --group docs sphinx-autobuild docs/source docs/build/html --write-all
 ```
 You can open up the docs in your browser at http://localhost:8000
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index e31d08ff1..000000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-sphinx==8.1.3
-myst-parser
-linkify
--e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
-sphinx-rtd-theme>=1.0.0
-sphinx_autobuild
-sphinx-copybutton
-sphinx-design
-sphinx-pdj-theme
-sphinx_rtd_dark_mode
-sphinx-tabs
-sphinxcontrib-openapi
-sphinxcontrib-redoc
-sphinxcontrib-mermaid
-sphinxcontrib-video
-tomli
diff --git a/docs/source/building_applications/rag.md b/docs/source/building_applications/rag.md
index 39d1ba333..289c38991 100644
--- a/docs/source/building_applications/rag.md
+++ b/docs/source/building_applications/rag.md
@@ -51,11 +51,37 @@ chunks = [
         "mime_type": "text/plain",
         "metadata": {
             "document_id": "doc1",
+            "author": "Jane Doe",
         },
     },
 ]
 client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
 ```
+
+#### Using Precomputed Embeddings
+If you decide to precompute embeddings for your documents, you can insert them directly into the vector database by
+including the embedding vectors in the chunk data. This is useful if you have a separate embedding service or if you
+want to customize the ingestion process.
+```python
+chunks_with_embeddings = [
+    {
+        "content": "First chunk of text",
+        "mime_type": "text/plain",
+        "embedding": [0.1, 0.2, 0.3, ...],  # Your precomputed embedding vector
+        "metadata": {"document_id": "doc1", "section": "introduction"},
+    },
+    {
+        "content": "Second chunk of text",
+        "mime_type": "text/plain",
+        "embedding": [0.2, 0.3, 0.4, ...],  # Your precomputed embedding vector
+        "metadata": {"document_id": "doc1", "section": "methodology"},
+    },
+]
+client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks_with_embeddings)
+```
+When providing precomputed embeddings, ensure the embedding dimension matches the embedding_dimension specified when
+registering the vector database.
+
 ### Retrieval
 You can query the vector database to retrieve documents based on their embeddings.
 ```python
@@ -68,7 +94,8 @@ chunks_response = client.vector_io.query(
 ### Using the RAG Tool
 
 A better way to ingest documents is to use the RAG Tool. This tool allows you to ingest documents from URLs, files, etc.
-and automatically chunks them into smaller pieces.
+and automatically chunks them into smaller pieces. More examples for how to format a RAGDocument can be found in the
+[appendix](#more-ragdocument-examples).
 
 ```python
 from llama_stack_client import RAGDocument
@@ -97,6 +124,17 @@ results = client.tool_runtime.rag_tool.query(
 )
 ```
 
+You can configure how the RAG tool adds metadata to the context if you find it useful for your application. Simply add:
+```python
+# Query documents
+results = client.tool_runtime.rag_tool.query(
+    vector_db_ids=[vector_db_id],
+    content="What do you know about...",
+    query_config={
+        "chunk_template": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
+    },
+)
+```
 ### Building RAG-Enhanced Agents
 
 One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
@@ -114,6 +152,12 @@ agent = Agent(
             "name": "builtin::rag/knowledge_search",
             "args": {
                 "vector_db_ids": [vector_db_id],
+                # Defaults
+                "query_config": {
+                    "chunk_size_in_tokens": 512,
+                    "chunk_overlap_in_tokens": 0,
+                    "chunk_template": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
+                },
             },
         }
     ],
@@ -178,3 +222,38 @@ for vector_db_id in client.vector_dbs.list():
     print(f"Unregistering vector database: {vector_db_id.identifier}")
     client.vector_dbs.unregister(vector_db_id=vector_db_id.identifier)
 ```
+
+### Appendix
+
+#### More RAGDocument Examples
+```python
+from llama_stack_client import RAGDocument
+import base64
+
+RAGDocument(document_id="num-0", content={"uri": "file://path/to/file"})
+RAGDocument(document_id="num-1", content="plain text")
+RAGDocument(
+    document_id="num-2",
+    content={
+        "type": "text",
+        "text": "plain text input",
+    },  # for inputs that should be treated as text explicitly
+)
+RAGDocument(
+    document_id="num-3",
+    content={
+        "type": "image",
+        "image": {"url": {"uri": "https://mywebsite.com/image.jpg"}},
+    },
+)
+B64_ENCODED_IMAGE = base64.b64encode(
+    requests.get(
+        "https://raw.githubusercontent.com/meta-llama/llama-stack/refs/heads/main/docs/_static/llama-stack.png"
+    ).content
+)
+RAGDocuemnt(
+    document_id="num-4",
+    content={"type": "image", "image": {"data": B64_ENCODED_IMAGE}},
+)
+```
+for more strongly typed interaction use the typed dicts found [here](https://github.com/meta-llama/llama-stack-client-python/blob/38cd91c9e396f2be0bec1ee96a19771582ba6f17/src/llama_stack_client/types/shared_params/document.py).
diff --git a/docs/source/building_applications/tools.md b/docs/source/building_applications/tools.md
index 94841a773..c7af17bfa 100644
--- a/docs/source/building_applications/tools.md
+++ b/docs/source/building_applications/tools.md
@@ -41,30 +41,9 @@ client.toolgroups.register(
 
 The tool requires an API key which can be provided either in the configuration or through the request header `X-LlamaStack-Provider-Data`. The format of the header is `{"<provider_name>_api_key": <your api key>}`.
 
+> **NOTE:** When using Tavily Search and Bing Search, the inference output will still display "Brave Search." This is because Llama models have been trained with Brave Search as a built-in tool. Tavily and bing is just being used in lieu of Brave search.
 
 
-#### Code Interpreter
-
-The Code Interpreter allows execution of Python code within a controlled environment.
-
-```python
-# Register Code Interpreter tool group
-client.toolgroups.register(
-    toolgroup_id="builtin::code_interpreter", provider_id="code_interpreter"
-)
-```
-
-Features:
-- Secure execution environment using `bwrap` sandboxing
-- Matplotlib support for generating plots
-- Disabled dangerous system operations
-- Configurable execution timeouts
-
-> ⚠️ Important: The code interpreter tool can operate in a controlled environment locally or on Podman containers. To ensure proper functionality in containerized environments:
-> - The container requires privileged access (e.g., --privileged).
-> - Users without sufficient permissions may encounter permission errors. (`bwrap: Can't mount devpts on /newroot/dev/pts: Permission denied`)
-> - 🔒 Security Warning: Privileged mode grants elevated access and bypasses security restrictions. Use only in local, isolated, or controlled environments.
-
 #### WolframAlpha
 
 The WolframAlpha tool provides access to computational knowledge through the WolframAlpha API.
@@ -102,7 +81,7 @@ Features:
 - Context retrieval with token limits
 
 
-> **Note:** By default, llama stack run.yaml defines toolgroups for web search, code interpreter and rag, that are provided by tavily-search, code-interpreter and rag providers.
+> **Note:** By default, llama stack run.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers.
 
 ## Model Context Protocol (MCP) Tools
 
@@ -186,31 +165,69 @@ all_tools = client.tools.list_tools()
 group_tools = client.tools.list_tools(toolgroup_id="search_tools")
 ```
 
-## Simple Example: Using an Agent with the Code-Interpreter Tool
-
+## Simple Example 2: Using an Agent with the Web Search Tool
+1. Start by registering a Tavily API key at [Tavily](https://tavily.com/).
+2. [Optional] Provide the API key directly to the Llama Stack server
+```bash
+export TAVILY_SEARCH_API_KEY="your key"
+```
+```bash
+--env TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY}
+```
+3. Run the following script.
 ```python
-from llama_stack_client import Agent
+from llama_stack_client.lib.agents.agent import Agent
+from llama_stack_client.types.agent_create_params import AgentConfig
+from llama_stack_client.lib.agents.event_logger import EventLogger
+from llama_stack_client import LlamaStackClient
+
+client = LlamaStackClient(
+    base_url=f"http://localhost:8321",
+    provider_data={
+        "tavily_search_api_key": "your_TAVILY_SEARCH_API_KEY"
+    },  # Set this from the client side. No need to provide it if it has already been configured on the Llama Stack server.
+)
 
-# Instantiate the AI agent with the given configuration
 agent = Agent(
     client,
-    name="code-interpreter",
-    description="A code interpreter agent for executing Python code snippets",
-    instructions="""
-    You are a highly reliable, concise, and precise assistant.
-    Always show the generated code, never generate your own code, and never anticipate results.
-    """,
     model="meta-llama/Llama-3.2-3B-Instruct",
-    tools=["builtin::code_interpreter"],
-    max_infer_iters=5,
+    instructions=(
+        "You are a web search assistant, must use websearch tool to look up the most current and precise information available. "
+    ),
+    tools=["builtin::websearch"],
 )
 
-# Start a session
-session_id = agent.create_session("tool_session")
+session_id = agent.create_session("websearch-session")
 
-# Send a query to the AI agent for code execution
 response = agent.create_turn(
-    messages=[{"role": "user", "content": "Run this code: print(3 ** 4 - 5 * 2)"}],
+    messages=[
+        {"role": "user", "content": "How did the USA perform in the last Olympics?"}
+    ],
     session_id=session_id,
 )
+for log in EventLogger().log(response):
+    log.print()
+```
+
+## Simple Example3: Using an Agent with the WolframAlpha Tool
+1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access).
+2. Provide the API key either when starting the Llama Stack server:
+    ```bash
+    --env WOLFRAM_ALPHA_API_KEY=${WOLFRAM_ALPHA_API_KEY}
+    ```
+    or from the client side:
+    ```python
+    client = LlamaStackClient(
+        base_url="http://localhost:8321",
+        provider_data={"wolfram_alpha_api_key": wolfram_api_key},
+    )
+    ```
+3. Configure the tools in the Agent by setting `tools=["builtin::wolfram_alpha"]`.
+4. Example user query:
+    ```python
+    response = agent.create_turn(
+        messages=[{"role": "user", "content": "Solve x^2 + 2x + 1 = 0 using WolframAlpha"}],
+        session_id=session_id,
+    )
+    ```
 ```
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 55c6383b2..6e59dbdfb 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -22,7 +22,11 @@ from docutils import nodes
 # Read version from pyproject.toml
 with Path(__file__).parent.parent.parent.joinpath("pyproject.toml").open("rb") as f:
     pypi_url = "https://pypi.org/pypi/llama-stack/json"
-    version_tag = json.loads(requests.get(pypi_url).text)["info"]["version"]
+    headers = {
+        'User-Agent': 'pip/23.0.1 (python 3.11)',  # Mimic pip's user agent
+        'Accept': 'application/json'
+    }
+    version_tag = json.loads(requests.get(pypi_url, headers=headers).text)["info"]["version"]
     print(f"{version_tag=}")
 
     # generate the full link including text and url here
@@ -53,14 +57,6 @@ myst_enable_extensions = ["colon_fence"]
 
 html_theme = "sphinx_rtd_theme"
 html_use_relative_paths = True
-
-# html_theme = "sphinx_pdj_theme"
-# html_theme_path = [sphinx_pdj_theme.get_html_theme_path()]
-
-# html_theme = "pytorch_sphinx_theme"
-# html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
-
-
 templates_path = ["_templates"]
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
@@ -110,6 +106,8 @@ html_theme_options = {
     "canonical_url": "https://github.com/meta-llama/llama-stack",
     "collapse_navigation": False,
     # "style_nav_header_background": "#c3c9d4",
+    'display_version': True,
+    'version_selector': True,
 }
 
 default_dark_mode = False
diff --git a/docs/source/contributing/new_api_provider.md b/docs/source/contributing/new_api_provider.md
index c412a350b..83058896a 100644
--- a/docs/source/contributing/new_api_provider.md
+++ b/docs/source/contributing/new_api_provider.md
@@ -6,7 +6,7 @@ This guide will walk you through the process of adding a new API provider to Lla
 - Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
 - Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally.
 - Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary.
-- Update any distribution {repopath}`Templates::llama_stack/templates/` build.yaml and run.yaml files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
+- Update any distribution {repopath}`Templates::llama_stack/templates/` `build.yaml` and `run.yaml` files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
 
 
 Here are some example PRs to help you get started:
@@ -33,6 +33,7 @@ Note that each provider's `sample_run_config()` method (in the configuration cla
 
 Unit tests are located in {repopath}`tests/unit`. Provider-specific unit tests are located in {repopath}`tests/unit/providers`. These tests are all run automatically as part of the CI process.
 
+Consult {repopath}`tests/unit/README.md` for more details on how to run the tests manually.
 
 ### 3. Additional end-to-end testing
 
diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md
index ad5d3bff4..0dbabf8aa 100644
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@@ -109,8 +109,6 @@ llama stack build --list-templates
 +------------------------------+-----------------------------------------------------------------------------+
 | nvidia                       | Use NVIDIA NIM for running LLM inference                                    |
 +------------------------------+-----------------------------------------------------------------------------+
-| meta-reference-quantized-gpu | Use Meta Reference with fp8, int4 quantization for running LLM inference    |
-+------------------------------+-----------------------------------------------------------------------------+
 | cerebras                     | Use Cerebras for running LLM inference                                      |
 +------------------------------+-----------------------------------------------------------------------------+
 | ollama                       | Use (an external) Ollama server for running LLM inference                   |
@@ -176,7 +174,11 @@ distribution_spec:
     safety: inline::llama-guard
     agents: inline::meta-reference
     telemetry: inline::meta-reference
+image_name: ollama
 image_type: conda
+
+# If some providers are external, you can specify the path to the implementation
+external_providers_dir: ~/.llama/providers.d
 ```
 
 ```
@@ -184,6 +186,57 @@ llama stack build --config llama_stack/templates/ollama/build.yaml
 ```
 :::
 
+:::{tab-item} Building with External Providers
+
+Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently or use community-provided providers.
+
+To build a distribution with external providers, you need to:
+
+1. Configure the `external_providers_dir` in your build configuration file:
+
+```yaml
+# Example my-external-stack.yaml with external providers
+version: '2'
+distribution_spec:
+  description: Custom distro for CI tests
+  providers:
+    inference:
+    - remote::custom_ollama
+# Add more providers as needed
+image_type: container
+image_name: ci-test
+# Path to external provider implementations
+external_providers_dir: ~/.llama/providers.d
+```
+
+Here's an example for a custom Ollama provider:
+
+```yaml
+adapter:
+  adapter_type: custom_ollama
+  pip_packages:
+  - ollama
+  - aiohttp
+  - llama-stack-provider-ollama # This is the provider package
+  config_class: llama_stack_ollama_provider.config.OllamaImplConfig
+  module: llama_stack_ollama_provider
+api_dependencies: []
+optional_api_dependencies: []
+```
+
+The `pip_packages` section lists the Python packages required by the provider, as well as the
+provider package itself. The package must be available on PyPI or can be provided from a local
+directory or a git repository (git must be installed on the build environment).
+
+2. Build your distribution using the config file:
+
+```
+llama stack build --config my-external-stack.yaml
+```
+
+For more information on external providers, including directory structure, provider types, and implementation requirements, see the [External Providers documentation](../providers/external.md).
+:::
+
 :::{tab-item} Building Container
 
 ```{admonition} Podman Alternative
@@ -218,7 +271,7 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
 
 ```
 llama stack run -h
-usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--disable-ipv6] [--env KEY=VALUE] [--tls-keyfile TLS_KEYFILE] [--tls-certfile TLS_CERTFILE]
+usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE] [--tls-keyfile TLS_KEYFILE] [--tls-certfile TLS_CERTFILE]
                        [--image-type {conda,container,venv}]
                        config
 
@@ -232,7 +285,6 @@ options:
   --port PORT           Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
   --image-name IMAGE_NAME
                         Name of the image to run. Defaults to the current environment (default: None)
-  --disable-ipv6        Disable IPv6 support (default: False)
   --env KEY=VALUE       Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: [])
   --tls-keyfile TLS_KEYFILE
                         Path to TLS key file for HTTPS (default: None)
@@ -286,6 +338,48 @@ INFO:     Application startup complete.
 INFO:     Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
 INFO:     2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
 ```
+### Listing Distributions
+Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
+
+```
+llama stack list -h
+usage: llama stack list [-h]
+
+list the build stacks
+
+options:
+  -h, --help  show this help message and exit
+```
+
+Example Usage
+
+```
+llama stack list
+```
+
+### Removing a Distribution
+Use the remove command to delete a distribution you've previously built.
+
+```
+llama stack rm -h
+usage: llama stack rm [-h] [--all] [name]
+
+Remove the build stack
+
+positional arguments:
+  name        Name of the stack to delete (default: None)
+
+options:
+  -h, --help  show this help message and exit
+  --all, -a   Delete all stacks (use with caution) (default: False)
+```
+
+Example
+```
+llama stack rm llamastack-test
+```
+
+To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they’re no longer needed.
 
 ### Troubleshooting
 
diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md
index c06632991..de99b6576 100644
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@@ -53,6 +53,13 @@ models:
   provider_id: ollama
   provider_model_id: null
 shields: []
+server:
+  port: 8321
+  auth:
+    provider_type: "kubernetes"
+    config:
+      api_server_url: "https://kubernetes.default.svc"
+      ca_cert_path: "/path/to/ca.crt"
 ```
 
 Let's break this down into the different sections. The first section specifies the set of APIs that the stack server will serve:
@@ -102,6 +109,227 @@ A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and i
 
 What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
 
+## Server Configuration
+
+The `server` section configures the HTTP server that serves the Llama Stack APIs:
+
+```yaml
+server:
+  port: 8321  # Port to listen on (default: 8321)
+  tls_certfile: "/path/to/cert.pem"  # Optional: Path to TLS certificate for HTTPS
+  tls_keyfile: "/path/to/key.pem"    # Optional: Path to TLS key for HTTPS
+```
+
+### Authentication Configuration
+
+The `auth` section configures authentication for the server. When configured, all API requests must include a valid Bearer token in the Authorization header:
+
+```
+Authorization: Bearer <token>
+```
+
+The server supports multiple authentication providers:
+
+#### OAuth 2.0/OpenID Connect Provider with Kubernetes
+
+The Kubernetes cluster must be configured to use a service account for authentication.
+
+```bash
+kubectl create namespace llama-stack
+kubectl create serviceaccount llama-stack-auth -n llama-stack
+kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
+kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
+```
+
+Make sure the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests
+and that the correct RoleBinding is created to allow the service account to access the necessary
+resources. If that is not the case, you can create a RoleBinding for the service account to access
+the necessary resources:
+
+```yaml
+# allow-anonymous-openid.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: allow-anonymous-openid
+rules:
+- nonResourceURLs: ["/openid/v1/jwks"]
+  verbs: ["get"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: allow-anonymous-openid
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: allow-anonymous-openid
+subjects:
+- kind: User
+  name: system:anonymous
+  apiGroup: rbac.authorization.k8s.io
+```
+
+And then apply the configuration:
+```bash
+kubectl apply -f allow-anonymous-openid.yaml
+```
+
+Validates tokens against the Kubernetes API server through the OIDC provider:
+```yaml
+server:
+  auth:
+    provider_type: "oauth2_token"
+    config:
+      jwks:
+        uri: "https://kubernetes.default.svc"
+        key_recheck_period: 3600
+      tls_cafile: "/path/to/ca.crt"
+      issuer: "https://kubernetes.default.svc"
+      audience: "https://kubernetes.default.svc"
+```
+
+To find your cluster's audience, run:
+```bash
+kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud
+```
+
+For the issuer, you can use the OIDC provider's URL:
+```bash
+kubectl get --raw /.well-known/openid-configuration| jq .issuer
+```
+
+For the tls_cafile, you can use the CA certificate of the OIDC provider:
+```bash
+kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}'
+```
+
+The provider extracts user information from the JWT token:
+- Username from the `sub` claim becomes a role
+- Kubernetes groups become teams
+
+You can easily validate a request by running:
+
+```bash
+curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
+```
+
+#### Custom Provider
+Validates tokens against a custom authentication endpoint:
+```yaml
+server:
+  auth:
+    provider_type: "custom"
+    config:
+      endpoint: "https://auth.example.com/validate"  # URL of the auth endpoint
+```
+
+The custom endpoint receives a POST request with:
+```json
+{
+  "api_key": "<token>",
+  "request": {
+    "path": "/api/v1/endpoint",
+    "headers": {
+      "content-type": "application/json",
+      "user-agent": "curl/7.64.1"
+    },
+    "params": {
+      "key": ["value"]
+    }
+  }
+}
+```
+
+And must respond with:
+```json
+{
+  "access_attributes": {
+    "roles": ["admin", "user"],
+    "teams": ["ml-team", "nlp-team"],
+    "projects": ["llama-3", "project-x"],
+    "namespaces": ["research"]
+  },
+  "message": "Authentication successful"
+}
+```
+
+If no access attributes are returned, the token is used as a namespace.
+
+### Quota Configuration
+
+The `quota` section allows you to enable server-side request throttling for both
+authenticated and anonymous clients. This is useful for preventing abuse, enforcing
+fairness across tenants, and controlling infrastructure costs without requiring
+client-side rate limiting or external proxies.
+
+Quotas are disabled by default. When enabled, each client is tracked using either:
+
+* Their authenticated `client_id` (derived from the Bearer token), or
+* Their IP address (fallback for anonymous requests)
+
+Quota state is stored in a SQLite-backed key-value store, and rate limits are applied
+within a configurable time window (currently only `day` is supported).
+
+#### Example
+
+```yaml
+server:
+  quota:
+    kvstore:
+      type: sqlite
+      db_path: ./quotas.db
+    anonymous_max_requests: 100
+    authenticated_max_requests: 1000
+    period: day
+```
+
+#### Configuration Options
+
+| Field                        | Description                                                                |
+| ---------------------------- | -------------------------------------------------------------------------- |
+| `kvstore`                    | Required. Backend storage config for tracking request counts.              |
+| `kvstore.type`               | Must be `"sqlite"` for now. Other backends may be supported in the future. |
+| `kvstore.db_path`            | File path to the SQLite database.                                          |
+| `anonymous_max_requests`     | Max requests per period for unauthenticated clients.                       |
+| `authenticated_max_requests` | Max requests per period for authenticated clients.                         |
+| `period`                     | Time window for quota enforcement. Only `"day"` is supported.              |
+
+> Note: if `authenticated_max_requests` is set but no authentication provider is
+configured, the server will fall back to applying `anonymous_max_requests` to all
+clients.
+
+#### Example with Authentication Enabled
+
+```yaml
+server:
+  port: 8321
+  auth:
+    provider_type: custom
+    config:
+      endpoint: https://auth.example.com/validate
+  quota:
+    kvstore:
+      type: sqlite
+      db_path: ./quotas.db
+    anonymous_max_requests: 100
+    authenticated_max_requests: 1000
+    period: day
+```
+
+If a client exceeds their limit, the server responds with:
+
+```http
+HTTP/1.1 429 Too Many Requests
+Content-Type: application/json
+
+{
+  "error": {
+    "message": "Quota exceeded"
+  }
+}
+```
+
 ## Extending to handle Safety
 
 Configuring Safety can be a little involved so it is instructive to go through an example.
diff --git a/docs/source/distributions/kubernetes_deployment.md b/docs/source/distributions/kubernetes_deployment.md
index 21ec02012..f43039824 100644
--- a/docs/source/distributions/kubernetes_deployment.md
+++ b/docs/source/distributions/kubernetes_deployment.md
@@ -172,7 +172,7 @@ spec:
       - name: llama-stack
         image: localhost/llama-stack-run-k8s:latest
         imagePullPolicy: IfNotPresent
-        command: ["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]
+        command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/config.yaml"]
         ports:
           - containerPort: 5000
         volumeMounts:
diff --git a/docs/source/distributions/ondevice_distro/android_sdk.md b/docs/source/distributions/ondevice_distro/android_sdk.md
index 4fa6eaf70..a097a2adf 100644
--- a/docs/source/distributions/ondevice_distro/android_sdk.md
+++ b/docs/source/distributions/ondevice_distro/android_sdk.md
@@ -24,7 +24,7 @@ The key files in the app are `ExampleLlamaStackLocalInference.kt`, `ExampleLlama
 Add the following dependency in your `build.gradle.kts` file:
 ```
 dependencies {
- implementation("com.llama.llamastack:llama-stack-client-kotlin:0.1.4.2")
+ implementation("com.llama.llamastack:llama-stack-client-kotlin:0.2.2")
 }
 ```
 This will download jar files in your gradle cache in a directory like `~/.gradle/caches/modules-2/files-2.1/com.llama.llamastack/`
@@ -37,11 +37,7 @@ For local inferencing, it is required to include the ExecuTorch library into you
 
 Include the ExecuTorch library by:
 1. Download the `download-prebuilt-et-lib.sh` script file from the [llama-stack-client-kotlin-client-local](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/llama-stack-client-kotlin-client-local/download-prebuilt-et-lib.sh) directory to your local machine.
-2. Move the script to the top level of your Android app where the app directory resides:
-<p align="center">
-<img src="https://github.com/meta-llama/llama-stack-client-kotlin/blob/latest-release/doc/img/example_android_app_directory.png" style="width:300px">
-</p>
-
+2. Move the script to the top level of your Android app where the `app` directory resides.
 3. Run `sh download-prebuilt-et-lib.sh` to create an `app/libs` directory and download the `executorch.aar` in that path. This generates an ExecuTorch library for the XNNPACK delegate.
 4. Add the `executorch.aar` dependency in your `build.gradle.kts` file:
 ```
@@ -52,6 +48,8 @@ dependencies {
 }
 ```
 
+See other dependencies for the local RAG in Android app [README](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/examples/android_app#quick-start).
+
 ## Llama Stack APIs in Your Android App
 Breaking down the demo app, this section will show the core pieces that are used to initialize and run inference with Llama Stack using the Kotlin library.
 
@@ -60,7 +58,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
 ```
 conda create -n stack-fireworks python=3.10
 conda activate stack-fireworks
-pip install --no-cache llama-stack==0.1.4
+pip install --no-cache llama-stack==0.2.2
 llama stack build --template fireworks --image-type conda
 export FIREWORKS_API_KEY=<SOME_KEY>
 llama stack run fireworks --port 5050
diff --git a/docs/source/distributions/remote_hosted_distro/watsonx.md b/docs/source/distributions/remote_hosted_distro/watsonx.md
new file mode 100644
index 000000000..ec1b98059
--- /dev/null
+++ b/docs/source/distributions/remote_hosted_distro/watsonx.md
@@ -0,0 +1,88 @@
+---
+orphan: true
+---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
+# watsonx Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-watsonx` distribution consists of the following provider configurations.
+
+| API | Provider(s) |
+|-----|-------------|
+| agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
+| inference | `remote::watsonx`, `inline::sentence-transformers` |
+| safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
+| telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| vector_io | `inline::faiss` |
+
+
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
+- `WATSONX_API_KEY`: watsonx API Key (default: ``)
+- `WATSONX_PROJECT_ID`: watsonx Project ID (default: ``)
+
+### Models
+
+The following models are available by default:
+
+- `meta-llama/llama-3-3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+- `meta-llama/llama-2-13b-chat (aliases: meta-llama/Llama-2-13b)`
+- `meta-llama/llama-3-1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
+- `meta-llama/llama-3-1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `meta-llama/llama-3-2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
+- `meta-llama/llama-3-2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
+- `meta-llama/llama-3-2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `meta-llama/llama-3-2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `meta-llama/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
+
+
+### Prerequisite: API Keys
+
+Make sure you have access to a watsonx API Key. You can get one by referring [watsonx.ai](https://www.ibm.com/docs/en/masv-and-l/maximo-manage/continuous-delivery?topic=setup-create-watsonx-api-key).
+
+
+## Running Llama Stack with watsonx
+
+You can do this via Conda (build code), venv or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ./run.yaml:/root/my-run.yaml \
+  llamastack/distribution-watsonx \
+  --config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
+  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
+```
+
+### Via Conda
+
+```bash
+llama stack build --template watsonx --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
+```
diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md
index 302d6932b..d7aedbfb2 100644
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@@ -19,7 +19,7 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
 | safety | `remote::bedrock` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md
index 8f441823a..3c4db1b75 100644
--- a/docs/source/distributions/self_hosted_distro/cerebras.md
+++ b/docs/source/distributions/self_hosted_distro/cerebras.md
@@ -12,7 +12,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
@@ -52,7 +52,7 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
   llamastack/distribution-cerebras \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
 ```
diff --git a/docs/source/distributions/self_hosted_distro/dell.md b/docs/source/distributions/self_hosted_distro/dell.md
index 96b0ef478..eded3bdc4 100644
--- a/docs/source/distributions/self_hosted_distro/dell.md
+++ b/docs/source/distributions/self_hosted_distro/dell.md
@@ -23,7 +23,7 @@ The `llamastack/distribution-dell` distribution consists of the following provid
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
@@ -155,7 +155,7 @@ docker run \
   -v $HOME/.llama:/root/.llama \
   -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-dell \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env DEH_URL=$DEH_URL \
diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md
index ee9ddc818..d36e94748 100644
--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@@ -22,7 +22,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::wolfram-alpha`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::wolfram-alpha`, `inline::rag-runtime`, `remote::model-context-protocol` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md
index 4f5a8a859..1b2194ad8 100644
--- a/docs/source/distributions/self_hosted_distro/groq.md
+++ b/docs/source/distributions/self_hosted_distro/groq.md
@@ -22,7 +22,7 @@ The `llamastack/distribution-groq` distribution consists of the following provid
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime` |
 | vector_io | `inline::faiss` |
 
 
@@ -43,7 +43,9 @@ The following models are available by default:
 - `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
 - `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
 - `groq/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
+- `groq/meta-llama/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
 - `groq/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
+- `groq/meta-llama/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
 
 
 ### Prerequisite: API Keys
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index b90f75347..8b9dcec55 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -22,7 +22,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
@@ -81,6 +81,7 @@ LLAMA_STACK_PORT=8321
 docker run \
   -it \
   --pull always \
+  --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   llamastack/distribution-meta-reference-gpu \
@@ -94,6 +95,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 docker run \
   -it \
   --pull always \
+  --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   llamastack/distribution-meta-reference-gpu \
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
deleted file mode 100644
index c3e2b4f2c..000000000
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ /dev/null
@@ -1,123 +0,0 @@
----
-orphan: true
----
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# Meta Reference Quantized Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists of the following provider configurations:
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| inference | `inline::meta-reference-quantized` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.
-
-Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`)
-- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
-
-
-## Prerequisite: Downloading Models
-
-Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
-
-```
-$ llama model list --downloaded
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
-┃ Model                                   ┃ Size     ┃ Modified Time       ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
-│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
-└─────────────────────────────────────────┴──────────┴─────────────────────┘
-```
-
-## Running the Distribution
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  llamastack/distribution-meta-reference-quantized-gpu \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  llamastack/distribution-meta-reference-quantized-gpu \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-```
-
-### Via Conda
-
-Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-llama stack build --template meta-reference-quantized-gpu --image-type conda
-llama stack run distributions/meta-reference-quantized-gpu/run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-llama stack run distributions/meta-reference-quantized-gpu/run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-```
diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md
index 58731392d..e84b5c525 100644
--- a/docs/source/distributions/self_hosted_distro/nvidia.md
+++ b/docs/source/distributions/self_hosted_distro/nvidia.md
@@ -6,8 +6,8 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
-| datasetio | `inline::localfs` |
-| eval | `inline::meta-reference` |
+| datasetio | `inline::localfs`, `remote::nvidia` |
+| eval | `remote::nvidia` |
 | inference | `remote::nvidia` |
 | post_training | `remote::nvidia` |
 | safety | `remote::nvidia` |
@@ -22,13 +22,13 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
 The following environment variables can be configured:
 
 - `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
-- `NVIDIA_USER_ID`: NVIDIA User ID (default: `llama-stack-user`)
+- `NVIDIA_APPEND_API_VERSION`: Whether to append the API version to the base_url (default: `True`)
 - `NVIDIA_DATASET_NAMESPACE`: NVIDIA Dataset Namespace (default: `default`)
-- `NVIDIA_ACCESS_POLICIES`: NVIDIA Access Policies (default: `{}`)
 - `NVIDIA_PROJECT_ID`: NVIDIA Project ID (default: `test-project`)
 - `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `https://customizer.api.nvidia.com`)
 - `NVIDIA_OUTPUT_MODEL_DIR`: NVIDIA Output Model Directory (default: `test-example-model@v1`)
 - `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`)
+- `NVIDIA_EVALUATOR_URL`: URL for the NeMo Evaluator Service (default: `http://0.0.0.0:7331`)
 - `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`)
 - `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`)
 
@@ -45,20 +45,91 @@ The following models are available by default:
 - `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
 - `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
 - `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `meta/llama-3.3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
 - `nvidia/llama-3.2-nv-embedqa-1b-v2 `
 - `nvidia/nv-embedqa-e5-v5 `
 - `nvidia/nv-embedqa-mistral-7b-v2 `
 - `snowflake/arctic-embed-l `
 
 
-### Prerequisite: API Keys
+## Prerequisites
+### NVIDIA API Keys
 
-Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/).
+Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable.
 
+### Deploy NeMo Microservices Platform
+The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform.
+
+## Supported Services
+Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints.
+
+### Inference: NVIDIA NIM
+NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs:
+  1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key)
+  2. Self-hosted: NVIDIA NIMs that run on your own infrastructure.
+
+The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment.
+
+### Datasetio API: NeMo Data Store
+The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint.
+
+See the [NVIDIA Datasetio docs](/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage.
+
+### Eval API: NeMo Evaluator
+The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Eval docs](/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage.
+
+### Post-Training API: NeMo Customizer
+The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Post-Training docs](/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage.
+
+### Safety API: NeMo Guardrails
+The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the NVIDIA Safety docs for supported features and example usage.
+
+## Deploying models
+In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`.
+
+Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart.
+```sh
+# URL to NeMo NIM Proxy service
+export NEMO_URL="http://nemo.test"
+
+curl --location "$NEMO_URL/v1/deployment/model-deployments" \
+   -H 'accept: application/json' \
+   -H 'Content-Type: application/json' \
+   -d '{
+      "name": "llama-3.2-1b-instruct",
+      "namespace": "meta",
+      "config": {
+         "model": "meta/llama-3.2-1b-instruct",
+         "nim_deployment": {
+            "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct",
+            "image_tag": "1.8.3",
+            "pvc_size": "25Gi",
+            "gpu": 1,
+            "additional_envs": {
+               "NIM_GUIDED_DECODING_BACKEND": "fast_outlines"
+            }
+         }
+      }
+   }'
+```
+This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference.
+
+You can also remove a deployed NIM to free up GPU resources, if needed.
+```sh
+export NEMO_URL="http://nemo.test"
+
+curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct"
+```
 
 ## Running Llama Stack with NVIDIA
 
-You can do this via Conda (build code) or Docker which has a pre-built image.
+You can do this via Conda or venv (build code), or Docker which has a pre-built image.
 
 ### Via Docker
 
@@ -72,7 +143,7 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
   llamastack/distribution-nvidia \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env NVIDIA_API_KEY=$NVIDIA_API_KEY
 ```
@@ -80,9 +151,23 @@ docker run \
 ### Via Conda
 
 ```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
 llama stack build --template nvidia --image-type conda
 llama stack run ./run.yaml \
   --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL
+```
+
+### Via venv
+
+If you've set up your local development environment, you can also build the image using your local virtual environment.
+
+```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
+llama stack build --template nvidia --image-type venv
+llama stack run ./run.yaml \
+  --port 8321 \
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
   --env INFERENCE_MODEL=$INFERENCE_MODEL
 ```
diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
index 2358a52a7..4d148feda 100644
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -19,10 +19,11 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
 | datasetio | `remote::huggingface`, `inline::localfs` |
 | eval | `inline::meta-reference` |
 | inference | `remote::ollama` |
+| post_training | `inline::huggingface` |
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
@@ -97,7 +98,7 @@ docker run \
   -v ~/.llama:/root/.llama \
   -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-ollama \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env SAFETY_MODEL=$SAFETY_MODEL \
diff --git a/docs/source/distributions/self_hosted_distro/passthrough.md b/docs/source/distributions/self_hosted_distro/passthrough.md
index 04fc9d927..39f076be4 100644
--- a/docs/source/distributions/self_hosted_distro/passthrough.md
+++ b/docs/source/distributions/self_hosted_distro/passthrough.md
@@ -22,7 +22,7 @@ The `llamastack/distribution-passthrough` distribution consists of the following
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::wolfram-alpha`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::wolfram-alpha`, `inline::rag-runtime`, `remote::model-context-protocol` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index e18b5bf40..6e7cf410d 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -21,7 +21,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
@@ -41,10 +41,10 @@ The following environment variables can be configured:
 
 ## Setting up vLLM server
 
-In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM
+In the following sections, we'll use AMD, NVIDIA or Intel GPUs to serve as hardware accelerators for the vLLM
 server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
 [supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
-that we only use GPUs here for demonstration purposes.
+that we only use GPUs here for demonstration purposes. Note that if you run into issues, you can include the environment variable `--env VLLM_DEBUG_LOG_API_SERVER_RESPONSE=true` (available in vLLM v0.8.3 and above) in the `docker run` command to enable log response from API server for debugging.
 
 ### Setting up vLLM server on AMD GPU
 
@@ -162,6 +162,55 @@ docker run \
     --port $SAFETY_PORT
 ```
 
+### Setting up vLLM server on Intel GPU
+
+Refer to [vLLM Documentation for XPU](https://docs.vllm.ai/en/v0.8.2/getting_started/installation/gpu.html?device=xpu) to get a vLLM endpoint. In addition to vLLM side setup which guides towards installing vLLM from sources orself-building vLLM Docker container, Intel provides prebuilt vLLM container to use on systems with Intel GPUs supported by PyTorch XPU backend:
+- [intel/vllm](https://hub.docker.com/r/intel/vllm)
+
+Here is a sample script to start a vLLM server locally via Docker using Intel provided container:
+
+```bash
+export INFERENCE_PORT=8000
+export INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+export ZE_AFFINITY_MASK=0
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $INFERENCE_PORT:$INFERENCE_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $INFERENCE_MODEL \
+    --port $INFERENCE_PORT
+```
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_PORT=8081
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export ZE_AFFINITY_MASK=1
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $SAFETY_PORT:$SAFETY_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $SAFETY_MODEL \
+    --port $SAFETY_PORT
+```
+
 ## Running Llama Stack
 
 Now you are ready to run Llama Stack with vLLM as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
@@ -184,7 +233,7 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
   llamastack/distribution-remote-vllm \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1
@@ -206,7 +255,7 @@ docker run \
   -v ~/.llama:/root/.llama \
   -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-remote-vllm \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1 \
diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md
index 76b976d78..bb4842362 100644
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@@ -16,10 +16,10 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
-| inference | `remote::sambanova` |
-| safety | `inline::llama-guard` |
+| inference | `remote::sambanova`, `inline::sentence-transformers` |
+| safety | `remote::sambanova` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
@@ -28,53 +28,64 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
 The following environment variables can be configured:
 
 - `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `SAMBANOVA_API_KEY`: SambaNova.AI API Key (default: ``)
+- `SAMBANOVA_API_KEY`: SambaNova API Key (default: ``)
 
 ### Models
 
 The following models are available by default:
 
-- `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
-- `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
-- `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
-- `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
-- `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
-- `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
-- `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
-- `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
-- `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
-- `Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
+- `sambanova/Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `sambanova/Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
+- `sambanova/Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
+- `sambanova/Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `sambanova/Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+- `sambanova/Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
+- `sambanova/Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `sambanova/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
+- `sambanova/Llama-4-Maverick-17B-128E-Instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
+- `sambanova/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
 
 
 ### Prerequisite: API Keys
 
-Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/).
+Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](http://cloud.sambanova.ai?utm_source=llamastack&utm_medium=external&utm_campaign=cloud_signup).
 
 
 ## Running Llama Stack with SambaNova
 
 You can do this via Conda (build code) or Docker which has a pre-built image.
 
-### Via Docker
 
-This method allows you to get started quickly without having to build the distribution code.
+### Via Docker
 
 ```bash
 LLAMA_STACK_PORT=8321
+llama stack build --template sambanova --image-type container
 docker run \
   -it \
-  --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-sambanova \
+  -v ~/.llama:/root/.llama \
+  distribution-sambanova \
   --port $LLAMA_STACK_PORT \
   --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
 ```
 
+
+### Via Venv
+
+```bash
+llama stack build --template sambanova --image-type venv
+llama stack run --image-type venv ~/.llama/distributions/sambanova/sambanova-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
+```
+
+
 ### Via Conda
 
 ```bash
 llama stack build --template sambanova --image-type conda
-llama stack run ./run.yaml \
+llama stack run --image-type conda ~/.llama/distributions/sambanova/sambanova-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
 ```
diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md
index f6b14b064..24f9d03ec 100644
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@@ -23,7 +23,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
@@ -117,7 +117,7 @@ docker run \
   -v ~/.llama:/root/.llama \
   -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-tgi \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env TGI_URL=http://host.docker.internal:$INFERENCE_PORT \
diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
index 3ebb1f59e..adfc2c472 100644
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@@ -22,7 +22,7 @@ The `llamastack/distribution-together` distribution consists of the following pr
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
diff --git a/docs/source/getting_started/detailed_tutorial.md b/docs/source/getting_started/detailed_tutorial.md
index a1504f249..e40a4903a 100644
--- a/docs/source/getting_started/detailed_tutorial.md
+++ b/docs/source/getting_started/detailed_tutorial.md
@@ -42,7 +42,7 @@ powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | ie
 Setup your virtual environment.
 
 ```bash
-uv venv --python 3.10
+uv sync --python 3.10
 source .venv/bin/activate
 ```
 ## Step 2:  Run Llama Stack
@@ -445,7 +445,6 @@ from llama_stack_client import LlamaStackClient
 from llama_stack_client import Agent, AgentEventLogger
 from llama_stack_client.types import Document
 import uuid
-from termcolor import cprint
 
 client = LlamaStackClient(base_url="http://localhost:8321")
 
@@ -463,7 +462,6 @@ urls = [
     "memory_optimizations.rst",
     "chat.rst",
     "llama3.rst",
-    "datasets.rst",
     "qat_finetune.rst",
     "lora_finetune.rst",
 ]
diff --git a/docs/source/providers/external.md b/docs/source/providers/external.md
index 90fc77979..55211ac5f 100644
--- a/docs/source/providers/external.md
+++ b/docs/source/providers/external.md
@@ -10,7 +10,7 @@ Llama Stack supports external providers that live outside of the main codebase.
 To enable external providers, you need to configure the `external_providers_dir` in your Llama Stack configuration. This directory should contain your external provider specifications:
 
 ```yaml
-external_providers_dir: /etc/llama-stack/providers.d/
+external_providers_dir: ~/.llama/providers.d/
 ```
 
 ## Directory Structure
@@ -50,9 +50,12 @@ Llama Stack supports two types of external providers:
 
 Here's a list of known external providers that you can use with Llama Stack:
 
-| Type | Name | Description | Repository |
-|------|------|-------------|------------|
-| Remote | KubeFlow Training | Train models with KubeFlow | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
+| Name | Description | API | Type | Repository |
+|------|-------------|-----|------|------------|
+| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
+| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
+| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
+| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
 
 ### Remote Provider Specification
 
@@ -179,7 +182,7 @@ dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
 3. Create the provider specification:
 
 ```yaml
-# /etc/llama-stack/providers.d/remote/inference/custom_ollama.yaml
+# ~/.llama/providers.d/remote/inference/custom_ollama.yaml
 adapter:
   adapter_type: custom_ollama
   pip_packages: ["ollama", "aiohttp"]
@@ -198,7 +201,7 @@ uv pip install -e .
 5. Configure Llama Stack to use external providers:
 
 ```yaml
-external_providers_dir: /etc/llama-stack/providers.d/
+external_providers_dir: ~/.llama/providers.d/
 ```
 
 The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
diff --git a/docs/source/providers/index.md b/docs/source/providers/index.md
index 1d1a6e081..1f5026479 100644
--- a/docs/source/providers/index.md
+++ b/docs/source/providers/index.md
@@ -30,6 +30,18 @@ Runs inference with an LLM.
 ## Post Training
 Fine-tunes a model.
 
+#### Post Training Providers
+The following providers are available for Post Training:
+
+```{toctree}
+:maxdepth: 1
+
+external
+post_training/huggingface
+post_training/torchtune
+post_training/nvidia_nemo
+```
+
 ## Safety
 Applies safety policies to the output at a Systems (not only model) level.
 
diff --git a/docs/source/providers/post_training/huggingface.md b/docs/source/providers/post_training/huggingface.md
new file mode 100644
index 000000000..c342203a8
--- /dev/null
+++ b/docs/source/providers/post_training/huggingface.md
@@ -0,0 +1,122 @@
+---
+orphan: true
+---
+# HuggingFace SFTTrainer
+
+[HuggingFace SFTTrainer](https://huggingface.co/docs/trl/en/sft_trainer) is an inline post training provider for Llama Stack. It allows you to run supervised fine tuning on a variety of models using many datasets
+
+## Features
+
+- Simple access through the post_training API
+- Fully integrated with Llama Stack
+- GPU support, CPU support, and MPS support (MacOS Metal Performance Shaders)
+
+## Usage
+
+To use the HF SFTTrainer in your Llama Stack project, follow these steps:
+
+1. Configure your Llama Stack project to use this provider.
+2. Kick off a SFT job using the Llama Stack post_training API.
+
+## Setup
+
+You can access the HuggingFace trainer via the `ollama` distribution:
+
+```bash
+llama stack build --template ollama --image-type venv
+llama stack run --image-type venv ~/.llama/distributions/ollama/ollama-run.yaml
+```
+
+## Run Training
+
+You can access the provider and the `supervised_fine_tune` method via the post_training API:
+
+```python
+import time
+import uuid
+
+
+from llama_stack_client.types import (
+    post_training_supervised_fine_tune_params,
+    algorithm_config_param,
+)
+
+
+def create_http_client():
+    from llama_stack_client import LlamaStackClient
+
+    return LlamaStackClient(base_url="http://localhost:8321")
+
+
+client = create_http_client()
+
+# Example Dataset
+client.datasets.register(
+    purpose="post-training/messages",
+    source={
+        "type": "uri",
+        "uri": "huggingface://datasets/llamastack/simpleqa?split=train",
+    },
+    dataset_id="simpleqa",
+)
+
+training_config = post_training_supervised_fine_tune_params.TrainingConfig(
+    data_config=post_training_supervised_fine_tune_params.TrainingConfigDataConfig(
+        batch_size=32,
+        data_format="instruct",
+        dataset_id="simpleqa",
+        shuffle=True,
+    ),
+    gradient_accumulation_steps=1,
+    max_steps_per_epoch=0,
+    max_validation_steps=1,
+    n_epochs=4,
+)
+
+algorithm_config = algorithm_config_param.LoraFinetuningConfig(  # this config is also currently mandatory but should not be
+    alpha=1,
+    apply_lora_to_mlp=True,
+    apply_lora_to_output=False,
+    lora_attn_modules=["q_proj"],
+    rank=1,
+    type="LoRA",
+)
+
+job_uuid = f"test-job{uuid.uuid4()}"
+
+# Example Model
+training_model = "ibm-granite/granite-3.3-8b-instruct"
+
+start_time = time.time()
+response = client.post_training.supervised_fine_tune(
+    job_uuid=job_uuid,
+    logger_config={},
+    model=training_model,
+    hyperparam_search_config={},
+    training_config=training_config,
+    algorithm_config=algorithm_config,
+    checkpoint_dir="output",
+)
+print("Job: ", job_uuid)
+
+
+# Wait for the job to complete!
+while True:
+    status = client.post_training.job.status(job_uuid=job_uuid)
+    if not status:
+        print("Job not found")
+        break
+
+    print(status)
+    if status.status == "completed":
+        break
+
+    print("Waiting for job to complete...")
+    time.sleep(5)
+
+end_time = time.time()
+print("Job completed in", end_time - start_time, "seconds!")
+
+print("Artifacts:")
+print(client.post_training.job.artifacts(job_uuid=job_uuid))
+```
diff --git a/docs/source/providers/post_training/nvidia_nemo.md b/docs/source/providers/post_training/nvidia_nemo.md
new file mode 100644
index 000000000..1a7adbe16
--- /dev/null
+++ b/docs/source/providers/post_training/nvidia_nemo.md
@@ -0,0 +1,163 @@
+---
+orphan: true
+---
+# NVIDIA NEMO
+
+[NVIDIA NEMO](https://developer.nvidia.com/nemo-framework) is a remote post training provider for Llama Stack. It provides enterprise-grade fine-tuning capabilities through NVIDIA's NeMo Customizer service.
+
+## Features
+
+- Enterprise-grade fine-tuning capabilities
+- Support for LoRA and SFT fine-tuning
+- Integration with NVIDIA's NeMo Customizer service
+- Support for various NVIDIA-optimized models
+- Efficient training with NVIDIA hardware acceleration
+
+## Usage
+
+To use NVIDIA NEMO in your Llama Stack project, follow these steps:
+
+1. Configure your Llama Stack project to use this provider.
+2. Set up your NVIDIA API credentials.
+3. Kick off a fine-tuning job using the Llama Stack post_training API.
+
+## Setup
+
+You'll need to set the following environment variables:
+
+```bash
+export NVIDIA_API_KEY="your-api-key"
+export NVIDIA_DATASET_NAMESPACE="default"
+export NVIDIA_CUSTOMIZER_URL="your-customizer-url"
+export NVIDIA_PROJECT_ID="your-project-id"
+export NVIDIA_OUTPUT_MODEL_DIR="your-output-model-dir"
+```
+
+## Run Training
+
+You can access the provider and the `supervised_fine_tune` method via the post_training API:
+
+```python
+import time
+import uuid
+
+from llama_stack_client.types import (
+    post_training_supervised_fine_tune_params,
+    algorithm_config_param,
+)
+
+
+def create_http_client():
+    from llama_stack_client import LlamaStackClient
+
+    return LlamaStackClient(base_url="http://localhost:8321")
+
+
+client = create_http_client()
+
+# Example Dataset
+client.datasets.register(
+    purpose="post-training/messages",
+    source={
+        "type": "uri",
+        "uri": "huggingface://datasets/llamastack/simpleqa?split=train",
+    },
+    dataset_id="simpleqa",
+)
+
+training_config = post_training_supervised_fine_tune_params.TrainingConfig(
+    data_config=post_training_supervised_fine_tune_params.TrainingConfigDataConfig(
+        batch_size=8,  # Default batch size for NEMO
+        data_format="instruct",
+        dataset_id="simpleqa",
+        shuffle=True,
+    ),
+    n_epochs=50,  # Default epochs for NEMO
+    optimizer_config=post_training_supervised_fine_tune_params.TrainingConfigOptimizerConfig(
+        lr=0.0001,  # Default learning rate
+        weight_decay=0.01,  # NEMO-specific parameter
+    ),
+    # NEMO-specific parameters
+    log_every_n_steps=None,
+    val_check_interval=0.25,
+    sequence_packing_enabled=False,
+    hidden_dropout=None,
+    attention_dropout=None,
+    ffn_dropout=None,
+)
+
+algorithm_config = algorithm_config_param.LoraFinetuningConfig(
+    alpha=16,  # Default alpha for NEMO
+    type="LoRA",
+)
+
+job_uuid = f"test-job{uuid.uuid4()}"
+
+# Example Model - must be a supported NEMO model
+training_model = "meta/llama-3.1-8b-instruct"
+
+start_time = time.time()
+response = client.post_training.supervised_fine_tune(
+    job_uuid=job_uuid,
+    logger_config={},
+    model=training_model,
+    hyperparam_search_config={},
+    training_config=training_config,
+    algorithm_config=algorithm_config,
+    checkpoint_dir="output",
+)
+print("Job: ", job_uuid)
+
+# Wait for the job to complete!
+while True:
+    status = client.post_training.job.status(job_uuid=job_uuid)
+    if not status:
+        print("Job not found")
+        break
+
+    print(status)
+    if status.status == "completed":
+        break
+
+    print("Waiting for job to complete...")
+    time.sleep(5)
+
+end_time = time.time()
+print("Job completed in", end_time - start_time, "seconds!")
+
+print("Artifacts:")
+print(client.post_training.job.artifacts(job_uuid=job_uuid))
+```
+
+## Supported Models
+
+Currently supports the following models:
+- meta/llama-3.1-8b-instruct
+- meta/llama-3.2-1b-instruct
+
+## Supported Parameters
+
+### TrainingConfig
+- n_epochs (default: 50)
+- data_config
+- optimizer_config
+- log_every_n_steps
+- val_check_interval (default: 0.25)
+- sequence_packing_enabled (default: False)
+- hidden_dropout (0.0-1.0)
+- attention_dropout (0.0-1.0)
+- ffn_dropout (0.0-1.0)
+
+### DataConfig
+- dataset_id
+- batch_size (default: 8)
+
+### OptimizerConfig
+- lr (default: 0.0001)
+- weight_decay (default: 0.01)
+
+### LoRA Config
+- alpha (default: 16)
+- type (must be "LoRA")
+
+Note: Some parameters from the standard Llama Stack API are not supported and will be ignored with a warning.
diff --git a/docs/source/providers/post_training/torchtune.md b/docs/source/providers/post_training/torchtune.md
new file mode 100644
index 000000000..ef72505b1
--- /dev/null
+++ b/docs/source/providers/post_training/torchtune.md
@@ -0,0 +1,125 @@
+---
+orphan: true
+---
+# TorchTune
+
+[TorchTune](https://github.com/pytorch/torchtune) is an inline post training provider for Llama Stack. It provides a simple and efficient way to fine-tune language models using PyTorch.
+
+## Features
+
+- Simple access through the post_training API
+- Fully integrated with Llama Stack
+- GPU support and single device capabilities.
+- Support for LoRA
+
+## Usage
+
+To use TorchTune in your Llama Stack project, follow these steps:
+
+1. Configure your Llama Stack project to use this provider.
+2. Kick off a fine-tuning job using the Llama Stack post_training API.
+
+## Setup
+
+You can access the TorchTune trainer by writing your own yaml pointing to the provider:
+
+```yaml
+post_training:
+  - provider_id: torchtune
+    provider_type: inline::torchtune
+    config: {}
+```
+
+you can then build and run your own stack with this provider.
+
+## Run Training
+
+You can access the provider and the `supervised_fine_tune` method via the post_training API:
+
+```python
+import time
+import uuid
+
+from llama_stack_client.types import (
+    post_training_supervised_fine_tune_params,
+    algorithm_config_param,
+)
+
+
+def create_http_client():
+    from llama_stack_client import LlamaStackClient
+
+    return LlamaStackClient(base_url="http://localhost:8321")
+
+
+client = create_http_client()
+
+# Example Dataset
+client.datasets.register(
+    purpose="post-training/messages",
+    source={
+        "type": "uri",
+        "uri": "huggingface://datasets/llamastack/simpleqa?split=train",
+    },
+    dataset_id="simpleqa",
+)
+
+training_config = post_training_supervised_fine_tune_params.TrainingConfig(
+    data_config=post_training_supervised_fine_tune_params.TrainingConfigDataConfig(
+        batch_size=32,
+        data_format="instruct",
+        dataset_id="simpleqa",
+        shuffle=True,
+    ),
+    gradient_accumulation_steps=1,
+    max_steps_per_epoch=0,
+    max_validation_steps=1,
+    n_epochs=4,
+)
+
+algorithm_config = algorithm_config_param.LoraFinetuningConfig(
+    alpha=1,
+    apply_lora_to_mlp=True,
+    apply_lora_to_output=False,
+    lora_attn_modules=["q_proj"],
+    rank=1,
+    type="LoRA",
+)
+
+job_uuid = f"test-job{uuid.uuid4()}"
+
+# Example Model
+training_model = "meta-llama/Llama-2-7b-hf"
+
+start_time = time.time()
+response = client.post_training.supervised_fine_tune(
+    job_uuid=job_uuid,
+    logger_config={},
+    model=training_model,
+    hyperparam_search_config={},
+    training_config=training_config,
+    algorithm_config=algorithm_config,
+    checkpoint_dir="output",
+)
+print("Job: ", job_uuid)
+
+# Wait for the job to complete!
+while True:
+    status = client.post_training.job.status(job_uuid=job_uuid)
+    if not status:
+        print("Job not found")
+        break
+
+    print(status)
+    if status.status == "completed":
+        break
+
+    print("Waiting for job to complete...")
+    time.sleep(5)
+
+end_time = time.time()
+print("Job completed in", end_time - start_time, "seconds!")
+
+print("Artifacts:")
+print(client.post_training.job.artifacts(job_uuid=job_uuid))
+```
diff --git a/docs/source/providers/vector_io/milvus.md b/docs/source/providers/vector_io/milvus.md
new file mode 100644
index 000000000..e030c85f8
--- /dev/null
+++ b/docs/source/providers/vector_io/milvus.md
@@ -0,0 +1,107 @@
+---
+orphan: true
+---
+# Milvus
+
+[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
+allows you to store and query vectors directly within a Milvus database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+
+- Easy to use
+- Fully integrated with Llama Stack
+
+## Usage
+
+To use Milvus in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Milvus.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install Milvus using pymilvus:
+
+```bash
+pip install pymilvus
+```
+
+## Configuration
+
+In Llama Stack, Milvus can be configured in two ways:
+- **Inline (Local) Configuration** - Uses Milvus-Lite for local storage
+- **Remote Configuration** - Connects to a remote Milvus server
+
+### Inline (Local) Configuration
+
+The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files:
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: inline::milvus
+    config:
+      db_path: ~/.llama/distributions/together/milvus_store.db
+```
+
+### Remote Configuration
+
+Remote configuration is suitable for larger data storage requirements:
+
+#### Standard Remote Connection
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: remote::milvus
+    config:
+      uri: "http://<host>:<port>"
+      token: "<user>:<password>"
+```
+
+#### TLS-Enabled Remote Connection (One-way TLS)
+
+For connections to Milvus instances with one-way TLS enabled:
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: remote::milvus
+    config:
+      uri: "https://<host>:<port>"
+      token: "<user>:<password>"
+      secure: True
+      server_pem_path: "/path/to/server.pem"
+```
+
+#### Mutual TLS (mTLS) Remote Connection
+
+For connections to Milvus instances with mutual TLS (mTLS) enabled:
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: remote::milvus
+    config:
+      uri: "https://<host>:<port>"
+      token: "<user>:<password>"
+      secure: True
+      ca_pem_path: "/path/to/ca.pem"
+      client_pem_path: "/path/to/client.pem"
+      client_key_path: "/path/to/client.key"
+```
+
+#### Key Parameters for TLS Configuration
+
+- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
+- **`server_pem_path`**: Path to the **server certificate** for verifying the server’s identity (used in one-way TLS).
+- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
+- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
+- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
+
+## Documentation
+See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
+
+For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
diff --git a/docs/source/providers/vector_io/mivus.md b/docs/source/providers/vector_io/mivus.md
deleted file mode 100644
index 8d2f043d5..000000000
--- a/docs/source/providers/vector_io/mivus.md
+++ /dev/null
@@ -1,31 +0,0 @@
----
-orphan: true
----
-# Milvus
-
-[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
-allows you to store and query vectors directly within a Milvus database.
-That means you're not limited to storing vectors in memory or in a separate service.
-
-## Features
-
-- Easy to use
-- Fully integrated with Llama Stack
-
-## Usage
-
-To use Milvus in your Llama Stack project, follow these steps:
-
-1. Install the necessary dependencies.
-2. Configure your Llama Stack project to use Milvus.
-3. Start storing and querying vectors.
-
-## Installation
-
-You can install Milvus using pymilvus:
-
-```bash
-pip install pymilvus
-```
-## Documentation
-See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
diff --git a/docs/source/providers/vector_io/sqlite-vec.md b/docs/source/providers/vector_io/sqlite-vec.md
index 43d10c751..49ba659f7 100644
--- a/docs/source/providers/vector_io/sqlite-vec.md
+++ b/docs/source/providers/vector_io/sqlite-vec.md
@@ -66,6 +66,25 @@ To use sqlite-vec in your Llama Stack project, follow these steps:
 2. Configure your Llama Stack project to use SQLite-Vec.
 3. Start storing and querying vectors.
 
+## Supported Search Modes
+
+The sqlite-vec provider supports both vector-based and keyword-based (full-text) search modes.
+
+When using the RAGTool interface, you can specify the desired search behavior via the `mode` parameter in
+`RAGQueryConfig`. For example:
+
+```python
+from llama_stack.apis.tool_runtime.rag import RAGQueryConfig
+
+query_config = RAGQueryConfig(max_chunks=6, mode="vector")
+
+results = client.tool_runtime.rag_tool.query(
+    vector_db_ids=[vector_db_id],
+    content="what is torchtune",
+    query_config=query_config,
+)
+```
+
 ## Installation
 
 You can install SQLite-Vec using pip:
diff --git a/docs/source/references/llama_stack_client_cli_reference.md b/docs/source/references/llama_stack_client_cli_reference.md
index 0b84027f0..cd4dd4cd7 100644
--- a/docs/source/references/llama_stack_client_cli_reference.md
+++ b/docs/source/references/llama_stack_client_cli_reference.md
@@ -253,8 +253,6 @@ llama-stack-client toolgroups list
 +---------------------------+------------------+------+---------------+
 | identifier                | provider_id      | args | mcp_endpoint  |
 +===========================+==================+======+===============+
-| builtin::code_interpreter | code-interpreter | None | None          |
-+---------------------------+------------------+------+---------------+
 | builtin::rag              | rag-runtime      | None | None          |
 +---------------------------+------------------+------+---------------+
 | builtin::websearch        | tavily-search    | None | None          |
diff --git a/docs/zero_to_hero_guide/00_Inference101.ipynb b/docs/zero_to_hero_guide/00_Inference101.ipynb
index b3b781375..4f71f9f89 100644
--- a/docs/zero_to_hero_guide/00_Inference101.ipynb
+++ b/docs/zero_to_hero_guide/00_Inference101.ipynb
@@ -389,5 +389,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb b/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb
index d66e1b4f5..19a7fe3be 100644
--- a/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb
+++ b/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb
@@ -256,5 +256,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/02_Prompt_Engineering101.ipynb b/docs/zero_to_hero_guide/02_Prompt_Engineering101.ipynb
index 7fccf8c51..f3566eeb3 100644
--- a/docs/zero_to_hero_guide/02_Prompt_Engineering101.ipynb
+++ b/docs/zero_to_hero_guide/02_Prompt_Engineering101.ipynb
@@ -301,5 +301,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.12.2"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/03_Image_Chat101.ipynb b/docs/zero_to_hero_guide/03_Image_Chat101.ipynb
index 58353e813..ae10d8808 100644
--- a/docs/zero_to_hero_guide/03_Image_Chat101.ipynb
+++ b/docs/zero_to_hero_guide/03_Image_Chat101.ipynb
@@ -200,5 +200,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.12.2"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb b/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
index c3a383e8c..de3754b21 100644
--- a/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
+++ b/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
@@ -355,5 +355,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/05_Memory101.ipynb b/docs/zero_to_hero_guide/05_Memory101.ipynb
index bfeb40adc..66956259f 100644
--- a/docs/zero_to_hero_guide/05_Memory101.ipynb
+++ b/docs/zero_to_hero_guide/05_Memory101.ipynb
@@ -398,5 +398,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/06_Safety101.ipynb b/docs/zero_to_hero_guide/06_Safety101.ipynb
index c8c1fe9c7..5d7763924 100644
--- a/docs/zero_to_hero_guide/06_Safety101.ipynb
+++ b/docs/zero_to_hero_guide/06_Safety101.ipynb
@@ -132,5 +132,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.11.10"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/07_Agents101.ipynb b/docs/zero_to_hero_guide/07_Agents101.ipynb
index 8c988e1e3..b6df2a4c8 100644
--- a/docs/zero_to_hero_guide/07_Agents101.ipynb
+++ b/docs/zero_to_hero_guide/07_Agents101.ipynb
@@ -188,5 +188,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md
index 9f756de26..96f9768de 100644
--- a/docs/zero_to_hero_guide/README.md
+++ b/docs/zero_to_hero_guide/README.md
@@ -86,11 +86,11 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
    llama stack build --template ollama --image-type conda
    ```
    **Expected Output:**
-   ```
+   ```bash
    ...
-   Build Successful! Next steps:
-   1. Set the environment variables: LLAMA_STACK_PORT, OLLAMA_URL, INFERENCE_MODEL, SAFETY_MODEL
-   2. `llama stack run /Users/<username>/.llama/distributions/llamastack-ollama/ollama-run.yaml
+   Build Successful!
+   You can find the newly-built template here: ~/.llama/distributions/ollama/ollama-run.yaml
+   You can run the new Llama Stack Distro via: llama stack run ~/.llama/distributions/ollama/ollama-run.yaml --image-type conda
    ```
 
 3. **Set the ENV variables by exporting them to the terminal**:
diff --git a/install.sh b/install.sh
new file mode 100755
index 000000000..e424925a6
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,206 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+set -Eeuo pipefail
+
+PORT=8321
+OLLAMA_PORT=11434
+MODEL_ALIAS="llama3.2:3b"
+SERVER_IMAGE="llamastack/distribution-ollama:0.2.2"
+WAIT_TIMEOUT=300
+
+log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
+die(){ printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2; exit 1; }
+
+wait_for_service() {
+  local url="$1"
+  local pattern="$2"
+  local timeout="$3"
+  local name="$4"
+  local start ts
+  log "⏳  Waiting for ${name}…"
+  start=$(date +%s)
+  while true; do
+    if curl --retry 5 --retry-delay 1 --retry-max-time "$timeout" --retry-all-errors --silent --fail "$url" 2>/dev/null | grep -q "$pattern"; then
+      break
+    fi
+    ts=$(date +%s)
+    if (( ts - start >= timeout )); then
+      return 1
+    fi
+    printf '.'
+    sleep 1
+  done
+  return 0
+}
+
+usage() {
+    cat << EOF
+📚 Llama-Stack Deployment Script
+
+Description:
+    This script sets up and deploys Llama-Stack with Ollama integration in containers.
+    It handles both Docker and Podman runtimes and includes automatic platform detection.
+
+Usage:
+    $(basename "$0") [OPTIONS]
+
+Options:
+    -p, --port PORT            Server port for Llama-Stack (default: ${PORT})
+    -o, --ollama-port PORT     Ollama service port (default: ${OLLAMA_PORT})
+    -m, --model MODEL          Model alias to use (default: ${MODEL_ALIAS})
+    -i, --image IMAGE          Server image (default: ${SERVER_IMAGE})
+    -t, --timeout SECONDS      Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
+    -h, --help                 Show this help message
+
+For more information:
+    Documentation: https://llama-stack.readthedocs.io/
+    GitHub: https://github.com/meta-llama/llama-stack
+
+Report issues:
+    https://github.com/meta-llama/llama-stack/issues
+EOF
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -h|--help)
+            usage
+            exit 0
+            ;;
+        -p|--port)
+            PORT="$2"
+            shift 2
+            ;;
+        -o|--ollama-port)
+            OLLAMA_PORT="$2"
+            shift 2
+            ;;
+        -m|--model)
+            MODEL_ALIAS="$2"
+            shift 2
+            ;;
+        -i|--image)
+            SERVER_IMAGE="$2"
+            shift 2
+            ;;
+        -t|--timeout)
+            WAIT_TIMEOUT="$2"
+            shift 2
+            ;;
+        *)
+            die "Unknown option: $1"
+            ;;
+    esac
+done
+
+if command -v docker &> /dev/null; then
+  ENGINE="docker"
+elif command -v podman &> /dev/null; then
+  ENGINE="podman"
+else
+  die "Docker or Podman is required. Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation"
+fi
+
+# Explicitly set the platform for the host architecture
+HOST_ARCH="$(uname -m)"
+if [ "$HOST_ARCH" = "arm64" ]; then
+  if [ "$ENGINE" = "docker" ]; then
+    PLATFORM_OPTS=( --platform linux/amd64 )
+  else
+    PLATFORM_OPTS=( --os linux --arch amd64 )
+  fi
+else
+  PLATFORM_OPTS=()
+fi
+
+# macOS + Podman: ensure VM is running before we try to launch containers
+# If you need GPU passthrough under Podman on macOS, init the VM with libkrun:
+#   CONTAINERS_MACHINE_PROVIDER=libkrun podman machine init
+if [ "$ENGINE" = "podman" ] && [ "$(uname -s)" = "Darwin" ]; then
+  if ! podman info &>/dev/null; then
+    log "⌛️ Initializing Podman VM…"
+    podman machine init &>/dev/null || true
+    podman machine start &>/dev/null || true
+
+    log "⌛️  Waiting for Podman API…"
+    until podman info &>/dev/null; do
+      sleep 1
+    done
+    log "✅  Podman VM is up"
+  fi
+fi
+
+# Clean up any leftovers from earlier runs
+for name in ollama-server llama-stack; do
+  ids=$($ENGINE ps -aq --filter "name=^${name}$")
+  if [ -n "$ids" ]; then
+    log "⚠️   Found existing container(s) for '${name}', removing…"
+    $ENGINE rm -f "$ids" > /dev/null 2>&1
+  fi
+done
+
+###############################################################################
+# 0. Create a shared network
+###############################################################################
+if ! $ENGINE network inspect llama-net >/dev/null 2>&1; then
+  log "🌐  Creating network…"
+  $ENGINE network create llama-net >/dev/null 2>&1
+fi
+
+###############################################################################
+# 1. Ollama
+###############################################################################
+log "🦙  Starting Ollama…"
+$ENGINE run -d "${PLATFORM_OPTS[@]}" --name ollama-server \
+  --network llama-net \
+  -p "${OLLAMA_PORT}:${OLLAMA_PORT}" \
+  ollama/ollama > /dev/null 2>&1
+
+if ! wait_for_service "http://localhost:${OLLAMA_PORT}/" "Ollama" "$WAIT_TIMEOUT" "Ollama daemon"; then
+  log "❌  Ollama daemon did not become ready in ${WAIT_TIMEOUT}s; dumping container logs:"
+  $ENGINE logs --tail 200 ollama-server
+  die "Ollama startup failed"
+fi
+
+log "📦  Ensuring model is pulled: ${MODEL_ALIAS}…"
+if ! $ENGINE exec ollama-server ollama pull "${MODEL_ALIAS}" > /dev/null 2>&1; then
+  log "❌  Failed to pull model ${MODEL_ALIAS}; dumping container logs:"
+  $ENGINE logs --tail 200 ollama-server
+  die "Model pull failed"
+fi
+
+###############################################################################
+# 2. Llama‑Stack
+###############################################################################
+cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
+      --network llama-net \
+      -p "${PORT}:${PORT}" \
+      "${SERVER_IMAGE}" --port "${PORT}" \
+      --env INFERENCE_MODEL="${MODEL_ALIAS}" \
+      --env OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" )
+
+log "🦙  Starting Llama‑Stack…"
+$ENGINE "${cmd[@]}" > /dev/null 2>&1
+
+if ! wait_for_service "http://127.0.0.1:${PORT}/v1/health" "OK" "$WAIT_TIMEOUT" "Llama-Stack API"; then
+  log "❌  Llama-Stack did not become ready in ${WAIT_TIMEOUT}s; dumping container logs:"
+  $ENGINE logs --tail 200 llama-stack
+  die "Llama-Stack startup failed"
+fi
+
+###############################################################################
+# Done
+###############################################################################
+log ""
+log "🎉  Llama‑Stack is ready!"
+log "👉  API endpoint: http://localhost:${PORT}"
+log "📖 Documentation: https://llama-stack.readthedocs.io/en/latest/references/index.html"
+log "💻 To access the llama‑stack CLI, exec into the container:"
+log "   $ENGINE exec -ti llama-stack bash"
+log ""
diff --git a/kvant_build_local.sh b/kvant_build_local.sh
new file mode 100755
index 000000000..9701c57dc
--- /dev/null
+++ b/kvant_build_local.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+export USE_COPY_NOT_MOUNT=true
+export LLAMA_STACK_DIR=.
+
+uvx --from . llama stack build --template kvant --image-type container  --image-name kvant
diff --git a/kvant_start_local.sh b/kvant_start_local.sh
new file mode 100755
index 000000000..db5bff84a
--- /dev/null
+++ b/kvant_start_local.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+export LLAMA_STACK_PORT=8321
+# VLLM_API_TOKEN= env file
+# KEYCLOAK_CLIENT_SECRET= env file
+
+
+docker run -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v $(pwd)/data:/root/.llama \
+  --mount type=bind,source="$(pwd)"/llama_stack/templates/kvant/run.yaml,target=/root/.llama/config.yaml,readonly \
+  --entrypoint python \
+  --env-file ./.env \
+  distribution-kvant:dev \
+  -m llama_stack.distribution.server.server  --config /root/.llama/config.yaml \
+  --port $LLAMA_STACK_PORT \
+
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index e13c4960b..b79c512b8 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -4,24 +4,16 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import sys
+from collections.abc import AsyncIterator
 from datetime import datetime
 from enum import Enum
-from typing import (
-    Annotated,
-    Any,
-    AsyncIterator,
-    Dict,
-    List,
-    Literal,
-    Optional,
-    Protocol,
-    Union,
-    runtime_checkable,
-)
+from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.apis.common.content_types import URL, ContentDelta, InterleavedContent
+from llama_stack.apis.common.responses import Order, PaginatedResponse
 from llama_stack.apis.inference import (
     CompletionMessage,
     ResponseFormat,
@@ -38,6 +30,23 @@ from llama_stack.apis.safety import SafetyViolation
 from llama_stack.apis.tools import ToolDef
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
+from .openai_responses import (
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    OpenAIResponseInput,
+    OpenAIResponseInputTool,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+)
+
+# TODO: use enum.StrEnum when we drop support for python 3.10
+if sys.version_info >= (3, 11):
+    from enum import StrEnum
+else:
+
+    class StrEnum(str, Enum):
+        """Backport of StrEnum for Python 3.10 and below."""
+
 
 class Attachment(BaseModel):
     """An attachment to an agent turn.
@@ -72,11 +81,11 @@ class StepCommon(BaseModel):
 
     turn_id: str
     step_id: str
-    started_at: Optional[datetime] = None
-    completed_at: Optional[datetime] = None
+    started_at: datetime | None = None
+    completed_at: datetime | None = None
 
 
-class StepType(Enum):
+class StepType(StrEnum):
     """Type of the step in an agent turn.
 
     :cvar inference: The step is an inference step that calls an LLM.
@@ -100,7 +109,7 @@ class InferenceStep(StepCommon):
 
     model_config = ConfigDict(protected_namespaces=())
 
-    step_type: Literal[StepType.inference.value] = StepType.inference.value
+    step_type: Literal[StepType.inference] = StepType.inference
     model_response: CompletionMessage
 
 
@@ -112,9 +121,9 @@ class ToolExecutionStep(StepCommon):
     :param tool_responses: The tool responses from the tool calls.
     """
 
-    step_type: Literal[StepType.tool_execution.value] = StepType.tool_execution.value
-    tool_calls: List[ToolCall]
-    tool_responses: List[ToolResponse]
+    step_type: Literal[StepType.tool_execution] = StepType.tool_execution
+    tool_calls: list[ToolCall]
+    tool_responses: list[ToolResponse]
 
 
 @json_schema_type
@@ -124,8 +133,8 @@ class ShieldCallStep(StepCommon):
     :param violation: The violation from the shield call.
     """
 
-    step_type: Literal[StepType.shield_call.value] = StepType.shield_call.value
-    violation: Optional[SafetyViolation]
+    step_type: Literal[StepType.shield_call] = StepType.shield_call
+    violation: SafetyViolation | None
 
 
 @json_schema_type
@@ -136,19 +145,14 @@ class MemoryRetrievalStep(StepCommon):
     :param inserted_context: The context retrieved from the vector databases.
     """
 
-    step_type: Literal[StepType.memory_retrieval.value] = StepType.memory_retrieval.value
+    step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
     # TODO: should this be List[str]?
     vector_db_ids: str
     inserted_context: InterleavedContent
 
 
 Step = Annotated[
-    Union[
-        InferenceStep,
-        ToolExecutionStep,
-        ShieldCallStep,
-        MemoryRetrievalStep,
-    ],
+    InferenceStep | ToolExecutionStep | ShieldCallStep | MemoryRetrievalStep,
     Field(discriminator="step_type"),
 ]
 
@@ -159,18 +163,13 @@ class Turn(BaseModel):
 
     turn_id: str
     session_id: str
-    input_messages: List[
-        Union[
-            UserMessage,
-            ToolResponseMessage,
-        ]
-    ]
-    steps: List[Step]
+    input_messages: list[UserMessage | ToolResponseMessage]
+    steps: list[Step]
     output_message: CompletionMessage
-    output_attachments: Optional[List[Attachment]] = Field(default_factory=list)
+    output_attachments: list[Attachment] | None = Field(default_factory=lambda: [])
 
     started_at: datetime
-    completed_at: Optional[datetime] = None
+    completed_at: datetime | None = None
 
 
 @json_schema_type
@@ -179,34 +178,31 @@ class Session(BaseModel):
 
     session_id: str
     session_name: str
-    turns: List[Turn]
+    turns: list[Turn]
     started_at: datetime
 
 
 class AgentToolGroupWithArgs(BaseModel):
     name: str
-    args: Dict[str, Any]
+    args: dict[str, Any]
 
 
-AgentToolGroup = Union[
-    str,
-    AgentToolGroupWithArgs,
-]
+AgentToolGroup = str | AgentToolGroupWithArgs
 register_schema(AgentToolGroup, name="AgentTool")
 
 
 class AgentConfigCommon(BaseModel):
-    sampling_params: Optional[SamplingParams] = Field(default_factory=SamplingParams)
+    sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
 
-    input_shields: Optional[List[str]] = Field(default_factory=list)
-    output_shields: Optional[List[str]] = Field(default_factory=list)
-    toolgroups: Optional[List[AgentToolGroup]] = Field(default_factory=list)
-    client_tools: Optional[List[ToolDef]] = Field(default_factory=list)
-    tool_choice: Optional[ToolChoice] = Field(default=None, deprecated="use tool_config instead")
-    tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None, deprecated="use tool_config instead")
-    tool_config: Optional[ToolConfig] = Field(default=None)
+    input_shields: list[str] | None = Field(default_factory=lambda: [])
+    output_shields: list[str] | None = Field(default_factory=lambda: [])
+    toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
+    client_tools: list[ToolDef] | None = Field(default_factory=lambda: [])
+    tool_choice: ToolChoice | None = Field(default=None, deprecated="use tool_config instead")
+    tool_prompt_format: ToolPromptFormat | None = Field(default=None, deprecated="use tool_config instead")
+    tool_config: ToolConfig | None = Field(default=None)
 
-    max_infer_iters: Optional[int] = 10
+    max_infer_iters: int | None = 10
 
     def model_post_init(self, __context):
         if self.tool_config:
@@ -225,10 +221,20 @@ class AgentConfigCommon(BaseModel):
 
 @json_schema_type
 class AgentConfig(AgentConfigCommon):
+    """Configuration for an agent.
+
+    :param model: The model identifier to use for the agent
+    :param instructions: The system instructions for the agent
+    :param name: Optional name for the agent, used in telemetry and identification
+    :param enable_session_persistence: Optional flag indicating whether session data has to be persisted
+    :param response_format: Optional response format configuration
+    """
+
     model: str
     instructions: str
-    enable_session_persistence: Optional[bool] = False
-    response_format: Optional[ResponseFormat] = None
+    name: str | None = None
+    enable_session_persistence: bool | None = False
+    response_format: ResponseFormat | None = None
 
 
 @json_schema_type
@@ -238,21 +244,11 @@ class Agent(BaseModel):
     created_at: datetime
 
 
-@json_schema_type
-class ListAgentsResponse(BaseModel):
-    data: List[Agent]
-
-
-@json_schema_type
-class ListAgentSessionsResponse(BaseModel):
-    data: List[Session]
-
-
 class AgentConfigOverridablePerTurn(AgentConfigCommon):
-    instructions: Optional[str] = None
+    instructions: str | None = None
 
 
-class AgentTurnResponseEventType(Enum):
+class AgentTurnResponseEventType(StrEnum):
     step_start = "step_start"
     step_complete = "step_complete"
     step_progress = "step_progress"
@@ -264,15 +260,15 @@ class AgentTurnResponseEventType(Enum):
 
 @json_schema_type
 class AgentTurnResponseStepStartPayload(BaseModel):
-    event_type: Literal[AgentTurnResponseEventType.step_start.value] = AgentTurnResponseEventType.step_start.value
+    event_type: Literal[AgentTurnResponseEventType.step_start] = AgentTurnResponseEventType.step_start
     step_type: StepType
     step_id: str
-    metadata: Optional[Dict[str, Any]] = Field(default_factory=dict)
+    metadata: dict[str, Any] | None = Field(default_factory=lambda: {})
 
 
 @json_schema_type
 class AgentTurnResponseStepCompletePayload(BaseModel):
-    event_type: Literal[AgentTurnResponseEventType.step_complete.value] = AgentTurnResponseEventType.step_complete.value
+    event_type: Literal[AgentTurnResponseEventType.step_complete] = AgentTurnResponseEventType.step_complete
     step_type: StepType
     step_id: str
     step_details: Step
@@ -282,7 +278,7 @@ class AgentTurnResponseStepCompletePayload(BaseModel):
 class AgentTurnResponseStepProgressPayload(BaseModel):
     model_config = ConfigDict(protected_namespaces=())
 
-    event_type: Literal[AgentTurnResponseEventType.step_progress.value] = AgentTurnResponseEventType.step_progress.value
+    event_type: Literal[AgentTurnResponseEventType.step_progress] = AgentTurnResponseEventType.step_progress
     step_type: StepType
     step_id: str
 
@@ -291,33 +287,29 @@ class AgentTurnResponseStepProgressPayload(BaseModel):
 
 @json_schema_type
 class AgentTurnResponseTurnStartPayload(BaseModel):
-    event_type: Literal[AgentTurnResponseEventType.turn_start.value] = AgentTurnResponseEventType.turn_start.value
+    event_type: Literal[AgentTurnResponseEventType.turn_start] = AgentTurnResponseEventType.turn_start
     turn_id: str
 
 
 @json_schema_type
 class AgentTurnResponseTurnCompletePayload(BaseModel):
-    event_type: Literal[AgentTurnResponseEventType.turn_complete.value] = AgentTurnResponseEventType.turn_complete.value
+    event_type: Literal[AgentTurnResponseEventType.turn_complete] = AgentTurnResponseEventType.turn_complete
     turn: Turn
 
 
 @json_schema_type
 class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
-    event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input.value] = (
-        AgentTurnResponseEventType.turn_awaiting_input.value
-    )
+    event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input] = AgentTurnResponseEventType.turn_awaiting_input
     turn: Turn
 
 
 AgentTurnResponseEventPayload = Annotated[
-    Union[
-        AgentTurnResponseStepStartPayload,
-        AgentTurnResponseStepProgressPayload,
-        AgentTurnResponseStepCompletePayload,
-        AgentTurnResponseTurnStartPayload,
-        AgentTurnResponseTurnCompletePayload,
-        AgentTurnResponseTurnAwaitingInputPayload,
-    ],
+    AgentTurnResponseStepStartPayload
+    | AgentTurnResponseStepProgressPayload
+    | AgentTurnResponseStepCompletePayload
+    | AgentTurnResponseTurnStartPayload
+    | AgentTurnResponseTurnCompletePayload
+    | AgentTurnResponseTurnAwaitingInputPayload,
     Field(discriminator="event_type"),
 ]
 register_schema(AgentTurnResponseEventPayload, name="AgentTurnResponseEventPayload")
@@ -346,18 +338,13 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
     # TODO: figure out how we can simplify this and make why
     # ToolResponseMessage needs to be here (it is function call
     # execution from outside the system)
-    messages: List[
-        Union[
-            UserMessage,
-            ToolResponseMessage,
-        ]
-    ]
+    messages: list[UserMessage | ToolResponseMessage]
 
-    documents: Optional[List[Document]] = None
-    toolgroups: Optional[List[AgentToolGroup]] = None
+    documents: list[Document] | None = None
+    toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
 
-    stream: Optional[bool] = False
-    tool_config: Optional[ToolConfig] = None
+    stream: bool | None = False
+    tool_config: ToolConfig | None = None
 
 
 @json_schema_type
@@ -365,8 +352,8 @@ class AgentTurnResumeRequest(BaseModel):
     agent_id: str
     session_id: str
     turn_id: str
-    tool_responses: List[ToolResponse]
-    stream: Optional[bool] = False
+    tool_responses: list[ToolResponse]
+    stream: bool | None = False
 
 
 @json_schema_type
@@ -412,17 +399,12 @@ class Agents(Protocol):
         self,
         agent_id: str,
         session_id: str,
-        messages: List[
-            Union[
-                UserMessage,
-                ToolResponseMessage,
-            ]
-        ],
-        stream: Optional[bool] = False,
-        documents: Optional[List[Document]] = None,
-        toolgroups: Optional[List[AgentToolGroup]] = None,
-        tool_config: Optional[ToolConfig] = None,
-    ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]:
+        messages: list[UserMessage | ToolResponseMessage],
+        stream: bool | None = False,
+        documents: list[Document] | None = None,
+        toolgroups: list[AgentToolGroup] | None = None,
+        tool_config: ToolConfig | None = None,
+    ) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
         """Create a new turn for an agent.
 
         :param agent_id: The ID of the agent to create the turn for.
@@ -433,8 +415,9 @@ class Agents(Protocol):
         :param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
         :param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
         :returns: If stream=False, returns a Turn object.
-                  If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk
+                  If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.
         """
+        ...
 
     @webmethod(
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
@@ -446,9 +429,9 @@ class Agents(Protocol):
         agent_id: str,
         session_id: str,
         turn_id: str,
-        tool_responses: List[ToolResponse],
-        stream: Optional[bool] = False,
-    ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]:
+        tool_responses: list[ToolResponse],
+        stream: bool | None = False,
+    ) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
         """Resume an agent turn with executed tool call responses.
 
         When a Turn has the status `awaiting_input` due to pending input from client side tool calls, this endpoint can be used to submit the outputs from the tool calls once they are ready.
@@ -521,13 +504,14 @@ class Agents(Protocol):
         self,
         session_id: str,
         agent_id: str,
-        turn_ids: Optional[List[str]] = None,
+        turn_ids: list[str] | None = None,
     ) -> Session:
         """Retrieve an agent session by its ID.
 
         :param session_id: The ID of the session to get.
         :param agent_id: The ID of the agent to get the session for.
         :param turn_ids: (Optional) List of turn IDs to filter the session by.
+        :returns: A Session.
         """
         ...
 
@@ -537,7 +521,7 @@ class Agents(Protocol):
         session_id: str,
         agent_id: str,
     ) -> None:
-        """Delete an agent session by its ID.
+        """Delete an agent session by its ID and its associated turns.
 
         :param session_id: The ID of the session to delete.
         :param agent_id: The ID of the agent to delete the session for.
@@ -549,17 +533,19 @@ class Agents(Protocol):
         self,
         agent_id: str,
     ) -> None:
-        """Delete an agent by its ID.
+        """Delete an agent by its ID and its associated sessions and turns.
 
         :param agent_id: The ID of the agent to delete.
         """
         ...
 
     @webmethod(route="/agents", method="GET")
-    async def list_agents(self) -> ListAgentsResponse:
+    async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
         """List all agents.
 
-        :returns: A ListAgentsResponse.
+        :param start_index: The index to start the pagination from.
+        :param limit: The number of agents to return.
+        :returns: A PaginatedResponse.
         """
         ...
 
@@ -576,10 +562,94 @@ class Agents(Protocol):
     async def list_agent_sessions(
         self,
         agent_id: str,
-    ) -> ListAgentSessionsResponse:
+        start_index: int | None = None,
+        limit: int | None = None,
+    ) -> PaginatedResponse:
         """List all session(s) of a given agent.
 
         :param agent_id: The ID of the agent to list sessions for.
-        :returns: A ListAgentSessionsResponse.
+        :param start_index: The index to start the pagination from.
+        :param limit: The number of sessions to return.
+        :returns: A PaginatedResponse.
+        """
+        ...
+
+    # We situate the OpenAI Responses API in the Agents API just like we did things
+    # for Inference. The Responses API, in its intent, serves the same purpose as
+    # the Agents API above -- it is essentially a lightweight "agentic loop" with
+    # integrated tool calling.
+    #
+    # Both of these APIs are inherently stateful.
+
+    @webmethod(route="/openai/v1/responses/{response_id}", method="GET")
+    async def get_openai_response(
+        self,
+        response_id: str,
+    ) -> OpenAIResponseObject:
+        """Retrieve an OpenAI response by its ID.
+
+        :param response_id: The ID of the OpenAI response to retrieve.
+        :returns: An OpenAIResponseObject.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/responses", method="POST")
+    async def create_openai_response(
+        self,
+        input: str | list[OpenAIResponseInput],
+        model: str,
+        instructions: str | None = None,
+        previous_response_id: str | None = None,
+        store: bool | None = True,
+        stream: bool | None = False,
+        temperature: float | None = None,
+        tools: list[OpenAIResponseInputTool] | None = None,
+    ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
+        """Create a new OpenAI response.
+
+        :param input: Input message(s) to create the response.
+        :param model: The underlying LLM used for completions.
+        :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
+        :returns: An OpenAIResponseObject.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/responses", method="GET")
+    async def list_openai_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        """List all OpenAI responses.
+
+        :param after: The ID of the last response to return.
+        :param limit: The number of responses to return.
+        :param model: The model to filter responses by.
+        :param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
+        :returns: A ListOpenAIResponseObject.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET")
+    async def list_openai_response_input_items(
+        self,
+        response_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        include: list[str] | None = None,
+        limit: int | None = 20,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseInputItem:
+        """List input items for a given OpenAI response.
+
+        :param response_id: The ID of the response to retrieve input items for.
+        :param after: An item ID to list items after, used for pagination.
+        :param before: An item ID to list items before, used for pagination.
+        :param include: Additional fields to include in the response.
+        :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
+        :param order: The order to return the input items in. Default is desc.
+        :returns: An ListOpenAIResponseInputItem.
         """
         ...
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
new file mode 100644
index 000000000..6806e1d3f
--- /dev/null
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -0,0 +1,279 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Annotated, Any, Literal
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type, register_schema
+
+# NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
+# take their YAML and generate this file automatically. Their YAML is available.
+
+
+@json_schema_type
+class OpenAIResponseError(BaseModel):
+    code: str
+    message: str
+
+
+@json_schema_type
+class OpenAIResponseInputMessageContentText(BaseModel):
+    text: str
+    type: Literal["input_text"] = "input_text"
+
+
+@json_schema_type
+class OpenAIResponseInputMessageContentImage(BaseModel):
+    detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
+    type: Literal["input_image"] = "input_image"
+    # TODO: handle file_id
+    image_url: str | None = None
+
+
+# TODO: handle file content types
+OpenAIResponseInputMessageContent = Annotated[
+    OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
+
+
+@json_schema_type
+class OpenAIResponseOutputMessageContentOutputText(BaseModel):
+    text: str
+    type: Literal["output_text"] = "output_text"
+
+
+OpenAIResponseOutputMessageContent = Annotated[
+    OpenAIResponseOutputMessageContentOutputText,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent")
+
+
+@json_schema_type
+class OpenAIResponseMessage(BaseModel):
+    """
+    Corresponds to the various Message types in the Responses API.
+    They are all under one type because the Responses API gives them all
+    the same "type" value, and there is no way to tell them apart in certain
+    scenarios.
+    """
+
+    content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
+    role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
+    type: Literal["message"] = "message"
+
+    # The fields below are not used in all scenarios, but are required in others.
+    id: str | None = None
+    status: str | None = None
+
+
+@json_schema_type
+class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
+    id: str
+    status: str
+    type: Literal["web_search_call"] = "web_search_call"
+
+
+@json_schema_type
+class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
+    call_id: str
+    name: str
+    arguments: str
+    type: Literal["function_call"] = "function_call"
+    id: str | None = None
+    status: str | None = None
+
+
+@json_schema_type
+class OpenAIResponseOutputMessageMCPCall(BaseModel):
+    id: str
+    type: Literal["mcp_call"] = "mcp_call"
+    arguments: str
+    name: str
+    server_label: str
+    error: str | None = None
+    output: str | None = None
+
+
+class MCPListToolsTool(BaseModel):
+    input_schema: dict[str, Any]
+    name: str
+    description: str | None = None
+
+
+@json_schema_type
+class OpenAIResponseOutputMessageMCPListTools(BaseModel):
+    id: str
+    type: Literal["mcp_list_tools"] = "mcp_list_tools"
+    server_label: str
+    tools: list[MCPListToolsTool]
+
+
+OpenAIResponseOutput = Annotated[
+    OpenAIResponseMessage
+    | OpenAIResponseOutputMessageWebSearchToolCall
+    | OpenAIResponseOutputMessageFunctionToolCall
+    | OpenAIResponseOutputMessageMCPCall
+    | OpenAIResponseOutputMessageMCPListTools,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
+
+
+@json_schema_type
+class OpenAIResponseObject(BaseModel):
+    created_at: int
+    error: OpenAIResponseError | None = None
+    id: str
+    model: str
+    object: Literal["response"] = "response"
+    output: list[OpenAIResponseOutput]
+    parallel_tool_calls: bool = False
+    previous_response_id: str | None = None
+    status: str
+    temperature: float | None = None
+    top_p: float | None = None
+    truncation: str | None = None
+    user: str | None = None
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseCreated(BaseModel):
+    response: OpenAIResponseObject
+    type: Literal["response.created"] = "response.created"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
+    content_index: int
+    delta: str
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.output_text.delta"] = "response.output_text.delta"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
+    response: OpenAIResponseObject
+    type: Literal["response.completed"] = "response.completed"
+
+
+OpenAIResponseObjectStream = Annotated[
+    OpenAIResponseObjectStreamResponseCreated
+    | OpenAIResponseObjectStreamResponseOutputTextDelta
+    | OpenAIResponseObjectStreamResponseCompleted,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
+
+
+@json_schema_type
+class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
+    """
+    This represents the output of a function call that gets passed back to the model.
+    """
+
+    call_id: str
+    output: str
+    type: Literal["function_call_output"] = "function_call_output"
+    id: str | None = None
+    status: str | None = None
+
+
+OpenAIResponseInput = Annotated[
+    # Responses API allows output messages to be passed in as input
+    OpenAIResponseOutputMessageWebSearchToolCall
+    | OpenAIResponseOutputMessageFunctionToolCall
+    | OpenAIResponseInputFunctionToolCallOutput
+    |
+    # Fallback to the generic message type as a last resort
+    OpenAIResponseMessage,
+    Field(union_mode="left_to_right"),
+]
+register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
+
+
+@json_schema_type
+class OpenAIResponseInputToolWebSearch(BaseModel):
+    type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
+    # TODO: actually use search_context_size somewhere...
+    search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
+    # TODO: add user_location
+
+
+@json_schema_type
+class OpenAIResponseInputToolFunction(BaseModel):
+    type: Literal["function"] = "function"
+    name: str
+    description: str | None = None
+    parameters: dict[str, Any] | None
+    strict: bool | None = None
+
+
+class FileSearchRankingOptions(BaseModel):
+    ranker: str | None = None
+    score_threshold: float | None = Field(default=0.0, ge=0.0, le=1.0)
+
+
+@json_schema_type
+class OpenAIResponseInputToolFileSearch(BaseModel):
+    type: Literal["file_search"] = "file_search"
+    vector_store_id: list[str]
+    ranking_options: FileSearchRankingOptions | None = None
+    # TODO: add filters
+
+
+class ApprovalFilter(BaseModel):
+    always: list[str] | None = None
+    never: list[str] | None = None
+
+
+class AllowedToolsFilter(BaseModel):
+    tool_names: list[str] | None = None
+
+
+@json_schema_type
+class OpenAIResponseInputToolMCP(BaseModel):
+    type: Literal["mcp"] = "mcp"
+    server_label: str
+    server_url: str
+    headers: dict[str, Any] | None = None
+
+    require_approval: Literal["always"] | Literal["never"] | ApprovalFilter = "never"
+    allowed_tools: list[str] | AllowedToolsFilter | None = None
+
+
+OpenAIResponseInputTool = Annotated[
+    OpenAIResponseInputToolWebSearch
+    | OpenAIResponseInputToolFileSearch
+    | OpenAIResponseInputToolFunction
+    | OpenAIResponseInputToolMCP,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
+
+
+class ListOpenAIResponseInputItem(BaseModel):
+    data: list[OpenAIResponseInput]
+    object: Literal["list"] = "list"
+
+
+@json_schema_type
+class OpenAIResponseObjectWithInput(OpenAIResponseObject):
+    input: list[OpenAIResponseInput]
+
+
+@json_schema_type
+class ListOpenAIResponseObject(BaseModel):
+    data: list[OpenAIResponseObjectWithInput]
+    has_more: bool
+    first_id: str
+    last_id: str
+    object: Literal["list"] = "list"
diff --git a/llama_stack/apis/batch_inference/batch_inference.py b/llama_stack/apis/batch_inference/batch_inference.py
index 7a324128d..b2aa637e2 100644
--- a/llama_stack/apis/batch_inference/batch_inference.py
+++ b/llama_stack/apis/batch_inference/batch_inference.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List, Optional, Protocol, runtime_checkable
+from typing import Protocol, runtime_checkable
 
 from llama_stack.apis.common.job_types import Job
 from llama_stack.apis.inference import (
@@ -34,22 +34,45 @@ class BatchInference(Protocol):
     async def completion(
         self,
         model: str,
-        content_batch: List[InterleavedContent],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> Job: ...
+        content_batch: list[InterleavedContent],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
+    ) -> Job:
+        """Generate completions for a batch of content.
+
+        :param model: The model to use for the completion.
+        :param content_batch: The content to complete.
+        :param sampling_params: The sampling parameters to use for the completion.
+        :param response_format: The response format to use for the completion.
+        :param logprobs: The logprobs to use for the completion.
+        :returns: A job for the completion.
+        """
+        ...
 
     @webmethod(route="/batch-inference/chat-completion", method="POST")
     async def chat_completion(
         self,
         model: str,
-        messages_batch: List[List[Message]],
-        sampling_params: Optional[SamplingParams] = None,
+        messages_batch: list[list[Message]],
+        sampling_params: SamplingParams | None = None,
         # zero-shot tool definitions as input to the model
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> Job: ...
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
+    ) -> Job:
+        """Generate chat completions for a batch of messages.
+
+        :param model: The model to use for the chat completion.
+        :param messages_batch: The messages to complete.
+        :param sampling_params: The sampling parameters to use for the completion.
+        :param tools: The tools to use for the chat completion.
+        :param tool_choice: The tool choice to use for the chat completion.
+        :param tool_prompt_format: The tool prompt format to use for the chat completion.
+        :param response_format: The response format to use for the chat completion.
+        :param logprobs: The logprobs to use for the chat completion.
+        :returns: A job for the chat completion.
+        """
+        ...
diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py
index 809af8868..d80c767f8 100644
--- a/llama_stack/apis/benchmarks/benchmarks.py
+++ b/llama_stack/apis/benchmarks/benchmarks.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable
+from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
@@ -13,8 +13,8 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 
 class CommonBenchmarkFields(BaseModel):
     dataset_id: str
-    scoring_functions: List[str]
-    metadata: Dict[str, Any] = Field(
+    scoring_functions: list[str]
+    metadata: dict[str, Any] = Field(
         default_factory=dict,
         description="Metadata for this evaluation task",
     )
@@ -22,45 +22,66 @@ class CommonBenchmarkFields(BaseModel):
 
 @json_schema_type
 class Benchmark(CommonBenchmarkFields, Resource):
-    type: Literal[ResourceType.benchmark.value] = ResourceType.benchmark.value
+    type: Literal[ResourceType.benchmark] = ResourceType.benchmark
 
     @property
     def benchmark_id(self) -> str:
         return self.identifier
 
     @property
-    def provider_benchmark_id(self) -> str:
+    def provider_benchmark_id(self) -> str | None:
         return self.provider_resource_id
 
 
 class BenchmarkInput(CommonBenchmarkFields, BaseModel):
     benchmark_id: str
-    provider_id: Optional[str] = None
-    provider_benchmark_id: Optional[str] = None
+    provider_id: str | None = None
+    provider_benchmark_id: str | None = None
 
 
 class ListBenchmarksResponse(BaseModel):
-    data: List[Benchmark]
+    data: list[Benchmark]
 
 
 @runtime_checkable
 class Benchmarks(Protocol):
     @webmethod(route="/eval/benchmarks", method="GET")
-    async def list_benchmarks(self) -> ListBenchmarksResponse: ...
+    async def list_benchmarks(self) -> ListBenchmarksResponse:
+        """List all benchmarks.
+
+        :returns: A ListBenchmarksResponse.
+        """
+        ...
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
     async def get_benchmark(
         self,
         benchmark_id: str,
-    ) -> Benchmark: ...
+    ) -> Benchmark:
+        """Get a benchmark by its ID.
+
+        :param benchmark_id: The ID of the benchmark to get.
+        :returns: A Benchmark.
+        """
+        ...
 
     @webmethod(route="/eval/benchmarks", method="POST")
     async def register_benchmark(
         self,
         benchmark_id: str,
         dataset_id: str,
-        scoring_functions: List[str],
-        provider_benchmark_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> None: ...
+        scoring_functions: list[str],
+        provider_benchmark_id: str | None = None,
+        provider_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Register a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to register.
+        :param dataset_id: The ID of the dataset to use for the benchmark.
+        :param scoring_functions: The scoring functions to use for the benchmark.
+        :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
+        :param provider_id: The ID of the provider to use for the benchmark.
+        :param metadata: The metadata to use for the benchmark.
+        """
+        ...
diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py
index 9d4e21308..8bcb781f7 100644
--- a/llama_stack/apis/common/content_types.py
+++ b/llama_stack/apis/common/content_types.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Annotated, List, Literal, Optional, Union
+from typing import Annotated, Literal
 
 from pydantic import BaseModel, Field, model_validator
 
@@ -26,9 +26,9 @@ class _URLOrData(BaseModel):
     :param data: base64 encoded image data as string
     """
 
-    url: Optional[URL] = None
+    url: URL | None = None
     # data is a base64 encoded string, hint with contentEncoding=base64
-    data: Optional[str] = Field(contentEncoding="base64", default=None)
+    data: str | None = Field(default=None, json_schema_extra={"contentEncoding": "base64"})
 
     @model_validator(mode="before")
     @classmethod
@@ -64,13 +64,13 @@ class TextContentItem(BaseModel):
 
 # other modalities can be added here
 InterleavedContentItem = Annotated[
-    Union[ImageContentItem, TextContentItem],
+    ImageContentItem | TextContentItem,
     Field(discriminator="type"),
 ]
 register_schema(InterleavedContentItem, name="InterleavedContentItem")
 
 # accept a single "str" as a special case since it is common
-InterleavedContent = Union[str, InterleavedContentItem, List[InterleavedContentItem]]
+InterleavedContent = str | InterleavedContentItem | list[InterleavedContentItem]
 register_schema(InterleavedContent, name="InterleavedContent")
 
 
@@ -100,13 +100,13 @@ class ToolCallDelta(BaseModel):
     # you either send an in-progress tool call so the client can stream a long
     # code generation or you send the final parsed tool call at the end of the
     # stream
-    tool_call: Union[str, ToolCall]
+    tool_call: str | ToolCall
     parse_status: ToolCallParseStatus
 
 
 # streaming completions send a stream of ContentDeltas
 ContentDelta = Annotated[
-    Union[TextDelta, ImageDelta, ToolCallDelta],
+    TextDelta | ImageDelta | ToolCallDelta,
     Field(discriminator="type"),
 ]
 register_schema(ContentDelta, name="ContentDelta")
diff --git a/llama_stack/apis/common/deployment_types.py b/llama_stack/apis/common/deployment_types.py
deleted file mode 100644
index 83eea28a2..000000000
--- a/llama_stack/apis/common/deployment_types.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from enum import Enum
-from typing import Any, Dict, Optional
-
-from pydantic import BaseModel
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.schema_utils import json_schema_type
-
-
-@json_schema_type
-class RestAPIMethod(Enum):
-    GET = "GET"
-    POST = "POST"
-    PUT = "PUT"
-    DELETE = "DELETE"
-
-
-@json_schema_type
-class RestAPIExecutionConfig(BaseModel):
-    url: URL
-    method: RestAPIMethod
-    params: Optional[Dict[str, Any]] = None
-    headers: Optional[Dict[str, Any]] = None
-    body: Optional[Dict[str, Any]] = None
diff --git a/llama_stack/apis/common/responses.py b/llama_stack/apis/common/responses.py
index f9e9a4c31..5cb41e23d 100644
--- a/llama_stack/apis/common/responses.py
+++ b/llama_stack/apis/common/responses.py
@@ -4,13 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, List
+from enum import Enum
+from typing import Any
 
 from pydantic import BaseModel
 
 from llama_stack.schema_utils import json_schema_type
 
 
+class Order(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
 @json_schema_type
 class PaginatedResponse(BaseModel):
     """A generic paginated response that follows a simple format.
@@ -19,5 +25,5 @@ class PaginatedResponse(BaseModel):
     :param has_more: Whether there are more items available after this set
     """
 
-    data: List[Dict[str, Any]]
+    data: list[dict[str, Any]]
     has_more: bool
diff --git a/llama_stack/apis/common/training_types.py b/llama_stack/apis/common/training_types.py
index d6c6c6919..46cd101af 100644
--- a/llama_stack/apis/common/training_types.py
+++ b/llama_stack/apis/common/training_types.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 from datetime import datetime
-from typing import Optional
 
 from pydantic import BaseModel
 
@@ -27,4 +26,4 @@ class Checkpoint(BaseModel):
     epoch: int
     post_training_job_id: str
     path: str
-    training_metrics: Optional[PostTrainingMetric] = None
+    training_metrics: PostTrainingMetric | None = None
diff --git a/llama_stack/apis/common/type_system.py b/llama_stack/apis/common/type_system.py
index 5d9f000be..db4aab4c5 100644
--- a/llama_stack/apis/common/type_system.py
+++ b/llama_stack/apis/common/type_system.py
@@ -4,10 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Literal, Union
+from typing import Annotated, Literal
 
 from pydantic import BaseModel, Field
-from typing_extensions import Annotated
 
 from llama_stack.schema_utils import json_schema_type, register_schema
 
@@ -73,18 +72,16 @@ class DialogType(BaseModel):
 
 
 ParamType = Annotated[
-    Union[
-        StringType,
-        NumberType,
-        BooleanType,
-        ArrayType,
-        ObjectType,
-        JsonType,
-        UnionType,
-        ChatCompletionInputType,
-        CompletionInputType,
-        AgentTurnInputType,
-    ],
+    StringType
+    | NumberType
+    | BooleanType
+    | ArrayType
+    | ObjectType
+    | JsonType
+    | UnionType
+    | ChatCompletionInputType
+    | CompletionInputType
+    | AgentTurnInputType,
     Field(discriminator="type"),
 ]
 register_schema(ParamType, name="ParamType")
diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py
index 6331882fb..1183983cc 100644
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
+from typing import Any, Protocol, runtime_checkable
 
 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.datasets import Dataset
@@ -24,8 +24,8 @@ class DatasetIO(Protocol):
     async def iterrows(
         self,
         dataset_id: str,
-        start_index: Optional[int] = None,
-        limit: Optional[int] = None,
+        start_index: int | None = None,
+        limit: int | None = None,
     ) -> PaginatedResponse:
         """Get a paginated list of rows from a dataset.
 
@@ -34,14 +34,21 @@ class DatasetIO(Protocol):
         - limit: Number of items to return. If None or -1, returns all items.
 
         The response includes:
-        - data: List of items for the current page
-        - has_more: Whether there are more items available after this set
+        - data: List of items for the current page.
+        - has_more: Whether there are more items available after this set.
 
         :param dataset_id: The ID of the dataset to get the rows from.
         :param start_index: Index into dataset for the first row to get. Get all rows if None.
         :param limit: The number of rows to get.
+        :returns: A PaginatedResponse.
         """
         ...
 
     @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
-    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
+    async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
+        """Append rows to a dataset.
+
+        :param dataset_id: The ID of the dataset to append the rows to.
+        :param rows: The rows to append to the dataset.
+        """
+        ...
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index 32ccde144..e3de3d5cb 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Annotated, Any, Dict, List, Literal, Optional, Protocol, Union
+from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
@@ -81,11 +81,11 @@ class RowsDataSource(BaseModel):
     """
 
     type: Literal["rows"] = "rows"
-    rows: List[Dict[str, Any]]
+    rows: list[dict[str, Any]]
 
 
 DataSource = Annotated[
-    Union[URIDataSource, RowsDataSource],
+    URIDataSource | RowsDataSource,
     Field(discriminator="type"),
 ]
 register_schema(DataSource, name="DataSource")
@@ -98,7 +98,7 @@ class CommonDatasetFields(BaseModel):
 
     purpose: DatasetPurpose
     source: DataSource
-    metadata: Dict[str, Any] = Field(
+    metadata: dict[str, Any] = Field(
         default_factory=dict,
         description="Any additional metadata for this dataset",
     )
@@ -106,14 +106,14 @@ class CommonDatasetFields(BaseModel):
 
 @json_schema_type
 class Dataset(CommonDatasetFields, Resource):
-    type: Literal[ResourceType.dataset.value] = ResourceType.dataset.value
+    type: Literal[ResourceType.dataset] = ResourceType.dataset
 
     @property
     def dataset_id(self) -> str:
         return self.identifier
 
     @property
-    def provider_dataset_id(self) -> str:
+    def provider_dataset_id(self) -> str | None:
         return self.provider_resource_id
 
 
@@ -122,7 +122,7 @@ class DatasetInput(CommonDatasetFields, BaseModel):
 
 
 class ListDatasetsResponse(BaseModel):
-    data: List[Dataset]
+    data: list[Dataset]
 
 
 class Datasets(Protocol):
@@ -131,13 +131,14 @@ class Datasets(Protocol):
         self,
         purpose: DatasetPurpose,
         source: DataSource,
-        metadata: Optional[Dict[str, Any]] = None,
-        dataset_id: Optional[str] = None,
+        metadata: dict[str, Any] | None = None,
+        dataset_id: str | None = None,
     ) -> Dataset:
         """
         Register a new dataset.
 
-        :param purpose: The purpose of the dataset. One of
+        :param purpose: The purpose of the dataset.
+        One of:
             - "post-training/messages": The dataset contains a messages column with list of messages for post-training.
                 {
                     "messages": [
@@ -188,8 +189,9 @@ class Datasets(Protocol):
                ]
            }
         :param metadata: The metadata for the dataset.
-           - E.g. {"description": "My dataset"}
+           - E.g. {"description": "My dataset"}.
         :param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
+        :returns: A Dataset.
         """
         ...
 
@@ -197,13 +199,29 @@ class Datasets(Protocol):
     async def get_dataset(
         self,
         dataset_id: str,
-    ) -> Dataset: ...
+    ) -> Dataset:
+        """Get a dataset by its ID.
+
+        :param dataset_id: The ID of the dataset to get.
+        :returns: A Dataset.
+        """
+        ...
 
     @webmethod(route="/datasets", method="GET")
-    async def list_datasets(self) -> ListDatasetsResponse: ...
+    async def list_datasets(self) -> ListDatasetsResponse:
+        """List all datasets.
+
+        :returns: A ListDatasetsResponse.
+        """
+        ...
 
     @webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
     async def unregister_dataset(
         self,
         dataset_id: str,
-    ) -> None: ...
+    ) -> None:
+        """Unregister a dataset by its ID.
+
+        :param dataset_id: The ID of the dataset to unregister.
+        """
+        ...
diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py
index 25f3ab1ab..63a764725 100644
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Optional
 
 from pydantic import BaseModel
 
@@ -54,4 +53,4 @@ class Error(BaseModel):
     status: int
     title: str
     detail: str
-    instance: Optional[str] = None
+    instance: str | None = None
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index 0e5959c37..83a0a8e56 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -4,10 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, List, Literal, Optional, Protocol, Union
+from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
-from typing_extensions import Annotated
 
 from llama_stack.apis.agents import AgentConfig
 from llama_stack.apis.common.job_types import Job
@@ -29,7 +28,7 @@ class ModelCandidate(BaseModel):
     type: Literal["model"] = "model"
     model: str
     sampling_params: SamplingParams
-    system_message: Optional[SystemMessage] = None
+    system_message: SystemMessage | None = None
 
 
 @json_schema_type
@@ -43,7 +42,7 @@ class AgentCandidate(BaseModel):
     config: AgentConfig
 
 
-EvalCandidate = Annotated[Union[ModelCandidate, AgentCandidate], Field(discriminator="type")]
+EvalCandidate = Annotated[ModelCandidate | AgentCandidate, Field(discriminator="type")]
 register_schema(EvalCandidate, name="EvalCandidate")
 
 
@@ -57,11 +56,11 @@ class BenchmarkConfig(BaseModel):
     """
 
     eval_candidate: EvalCandidate
-    scoring_params: Dict[str, ScoringFnParams] = Field(
+    scoring_params: dict[str, ScoringFnParams] = Field(
         description="Map between scoring function id and parameters for each scoring function you want to run",
         default_factory=dict,
     )
-    num_examples: Optional[int] = Field(
+    num_examples: int | None = Field(
         description="Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated",
         default=None,
     )
@@ -76,9 +75,9 @@ class EvaluateResponse(BaseModel):
     :param scores: The scores from the evaluation.
     """
 
-    generations: List[Dict[str, Any]]
+    generations: list[dict[str, Any]]
     # each key in the dict is a scoring function name
-    scores: Dict[str, ScoringResult]
+    scores: dict[str, ScoringResult]
 
 
 class Eval(Protocol):
@@ -94,15 +93,16 @@ class Eval(Protocol):
 
         :param benchmark_id: The ID of the benchmark to run the evaluation on.
         :param benchmark_config: The configuration for the benchmark.
-        :return: The job that was created to run the evaluation.
+        :returns: The job that was created to run the evaluation.
         """
+        ...
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
     async def evaluate_rows(
         self,
         benchmark_id: str,
-        input_rows: List[Dict[str, Any]],
-        scoring_functions: List[str],
+        input_rows: list[dict[str, Any]],
+        scoring_functions: list[str],
         benchmark_config: BenchmarkConfig,
     ) -> EvaluateResponse:
         """Evaluate a list of rows on a benchmark.
@@ -111,8 +111,9 @@ class Eval(Protocol):
         :param input_rows: The rows to evaluate.
         :param scoring_functions: The scoring functions to use for the evaluation.
         :param benchmark_config: The configuration for the benchmark.
-        :return: EvaluateResponse object containing generations and scores
+        :returns: EvaluateResponse object containing generations and scores.
         """
+        ...
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
     async def job_status(self, benchmark_id: str, job_id: str) -> Job:
@@ -120,7 +121,7 @@ class Eval(Protocol):
 
         :param benchmark_id: The ID of the benchmark to run the evaluation on.
         :param job_id: The ID of the job to get the status of.
-        :return: The status of the evaluationjob.
+        :returns: The status of the evaluation job.
         """
         ...
 
@@ -139,5 +140,6 @@ class Eval(Protocol):
 
         :param benchmark_id: The ID of the benchmark to run the evaluation on.
         :param job_id: The ID of the job to get the result of.
-        :return: The result of the job.
+        :returns: The result of the job.
         """
+        ...
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index ef8b65829..1d762a68a 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List, Optional, Protocol, runtime_checkable
+from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
@@ -42,7 +42,7 @@ class ListBucketResponse(BaseModel):
     :param data: List of FileResponse entries
     """
 
-    data: List[BucketResponse]
+    data: list[BucketResponse]
 
 
 @json_schema_type
@@ -74,7 +74,7 @@ class ListFileResponse(BaseModel):
     :param data: List of FileResponse entries
     """
 
-    data: List[FileResponse]
+    data: list[FileResponse]
 
 
 @runtime_checkable
@@ -91,10 +91,11 @@ class Files(Protocol):
         """
         Create a new upload session for a file identified by a bucket and key.
 
-        :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
-        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
-        :param mime_type: MIME type of the file
-        :param size: File size in bytes
+        :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-).
+        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
+        :param mime_type: MIME type of the file.
+        :param size: File size in bytes.
+        :returns: A FileUploadResponse.
         """
         ...
 
@@ -102,12 +103,13 @@ class Files(Protocol):
     async def upload_content_to_session(
         self,
         upload_id: str,
-    ) -> Optional[FileResponse]:
+    ) -> FileResponse | None:
         """
         Upload file content to an existing upload session.
         On the server, request body will have the raw bytes that are uploaded.
 
-        :param upload_id: ID of the upload session
+        :param upload_id: ID of the upload session.
+        :returns: A FileResponse or None if the upload is not complete.
         """
         ...
 
@@ -117,9 +119,10 @@ class Files(Protocol):
         upload_id: str,
     ) -> FileUploadResponse:
         """
-        Returns information about an existsing upload session
+        Returns information about an existsing upload session.
 
-        :param upload_id: ID of the upload session
+        :param upload_id: ID of the upload session.
+        :returns: A FileUploadResponse.
         """
         ...
 
@@ -130,6 +133,9 @@ class Files(Protocol):
     ) -> ListBucketResponse:
         """
         List all buckets.
+
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
+        :returns: A ListBucketResponse.
         """
         ...
 
@@ -141,7 +147,8 @@ class Files(Protocol):
         """
         List all files in a bucket.
 
-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
+        :returns: A ListFileResponse.
         """
         ...
 
@@ -154,8 +161,9 @@ class Files(Protocol):
         """
         Get a file info identified by a bucket and key.
 
-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
-        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
+        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
+        :returns: A FileResponse.
         """
         ...
 
@@ -168,7 +176,7 @@ class Files(Protocol):
         """
         Delete a file identified by a bucket and key.
 
-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
-        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
+        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
         """
         ...
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 21753ca23..74697dd18 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -4,23 +4,22 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import sys
+from collections.abc import AsyncIterator
 from enum import Enum
 from typing import (
+    Annotated,
     Any,
-    AsyncIterator,
-    Dict,
-    List,
     Literal,
-    Optional,
     Protocol,
-    Union,
     runtime_checkable,
 )
 
 from pydantic import BaseModel, Field, field_validator
-from typing_extensions import Annotated
+from typing_extensions import TypedDict
 
 from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
+from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
 from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
 from llama_stack.models.llama.datatypes import (
@@ -38,6 +37,16 @@ register_schema(ToolCall)
 register_schema(ToolParamDefinition)
 register_schema(ToolDefinition)
 
+# TODO: use enum.StrEnum when we drop support for python 3.10
+if sys.version_info >= (3, 11):
+    from enum import StrEnum
+else:
+
+    class StrEnum(str, Enum):
+        """Backport of StrEnum for Python 3.10 and below."""
+
+        pass
+
 
 @json_schema_type
 class GreedySamplingStrategy(BaseModel):
@@ -47,8 +56,8 @@ class GreedySamplingStrategy(BaseModel):
 @json_schema_type
 class TopPSamplingStrategy(BaseModel):
     type: Literal["top_p"] = "top_p"
-    temperature: Optional[float] = Field(..., gt=0.0)
-    top_p: Optional[float] = 0.95
+    temperature: float | None = Field(..., gt=0.0)
+    top_p: float | None = 0.95
 
 
 @json_schema_type
@@ -58,7 +67,7 @@ class TopKSamplingStrategy(BaseModel):
 
 
 SamplingStrategy = Annotated[
-    Union[GreedySamplingStrategy, TopPSamplingStrategy, TopKSamplingStrategy],
+    GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy,
     Field(discriminator="type"),
 ]
 register_schema(SamplingStrategy, name="SamplingStrategy")
@@ -79,9 +88,9 @@ class SamplingParams(BaseModel):
 
     strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
 
-    max_tokens: Optional[int] = 0
-    repetition_penalty: Optional[float] = 1.0
-    stop: Optional[List[str]] = None
+    max_tokens: int | None = 0
+    repetition_penalty: float | None = 1.0
+    stop: list[str] | None = None
 
 
 class LogProbConfig(BaseModel):
@@ -90,7 +99,7 @@ class LogProbConfig(BaseModel):
     :param top_k: How many tokens (for each position) to return log probabilities for.
     """
 
-    top_k: Optional[int] = 0
+    top_k: int | None = 0
 
 
 class QuantizationType(Enum):
@@ -125,11 +134,11 @@ class Int4QuantizationConfig(BaseModel):
     """
 
     type: Literal["int4_mixed"] = "int4_mixed"
-    scheme: Optional[str] = "int4_weight_int8_dynamic_activation"
+    scheme: str | None = "int4_weight_int8_dynamic_activation"
 
 
 QuantizationConfig = Annotated[
-    Union[Bf16QuantizationConfig, Fp8QuantizationConfig, Int4QuantizationConfig],
+    Bf16QuantizationConfig | Fp8QuantizationConfig | Int4QuantizationConfig,
     Field(discriminator="type"),
 ]
 
@@ -145,7 +154,7 @@ class UserMessage(BaseModel):
 
     role: Literal["user"] = "user"
     content: InterleavedContent
-    context: Optional[InterleavedContent] = None
+    context: InterleavedContent | None = None
 
 
 @json_schema_type
@@ -190,16 +199,11 @@ class CompletionMessage(BaseModel):
     role: Literal["assistant"] = "assistant"
     content: InterleavedContent
     stop_reason: StopReason
-    tool_calls: Optional[List[ToolCall]] = Field(default_factory=list)
+    tool_calls: list[ToolCall] | None = Field(default_factory=lambda: [])
 
 
 Message = Annotated[
-    Union[
-        UserMessage,
-        SystemMessage,
-        ToolResponseMessage,
-        CompletionMessage,
-    ],
+    UserMessage | SystemMessage | ToolResponseMessage | CompletionMessage,
     Field(discriminator="role"),
 ]
 register_schema(Message, name="Message")
@@ -208,9 +212,9 @@ register_schema(Message, name="Message")
 @json_schema_type
 class ToolResponse(BaseModel):
     call_id: str
-    tool_name: Union[BuiltinTool, str]
+    tool_name: BuiltinTool | str
     content: InterleavedContent
-    metadata: Optional[Dict[str, Any]] = None
+    metadata: dict[str, Any] | None = None
 
     @field_validator("tool_name", mode="before")
     @classmethod
@@ -243,7 +247,7 @@ class TokenLogProbs(BaseModel):
     :param logprobs_by_token: Dictionary mapping tokens to their log probabilities
     """
 
-    logprobs_by_token: Dict[str, float]
+    logprobs_by_token: dict[str, float]
 
 
 class ChatCompletionResponseEventType(Enum):
@@ -271,11 +275,11 @@ class ChatCompletionResponseEvent(BaseModel):
 
     event_type: ChatCompletionResponseEventType
     delta: ContentDelta
-    logprobs: Optional[List[TokenLogProbs]] = None
-    stop_reason: Optional[StopReason] = None
+    logprobs: list[TokenLogProbs] | None = None
+    stop_reason: StopReason | None = None
 
 
-class ResponseFormatType(Enum):
+class ResponseFormatType(StrEnum):
     """Types of formats for structured (guided) decoding.
 
     :cvar json_schema: Response should conform to a JSON schema. In a Python SDK, this is often a `pydantic` model.
@@ -294,8 +298,8 @@ class JsonSchemaResponseFormat(BaseModel):
     :param json_schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model.
     """
 
-    type: Literal[ResponseFormatType.json_schema.value] = ResponseFormatType.json_schema.value
-    json_schema: Dict[str, Any]
+    type: Literal[ResponseFormatType.json_schema] = ResponseFormatType.json_schema
+    json_schema: dict[str, Any]
 
 
 @json_schema_type
@@ -306,12 +310,12 @@ class GrammarResponseFormat(BaseModel):
     :param bnf: The BNF grammar specification the response should conform to
     """
 
-    type: Literal[ResponseFormatType.grammar.value] = ResponseFormatType.grammar.value
-    bnf: Dict[str, Any]
+    type: Literal[ResponseFormatType.grammar] = ResponseFormatType.grammar
+    bnf: dict[str, Any]
 
 
 ResponseFormat = Annotated[
-    Union[JsonSchemaResponseFormat, GrammarResponseFormat],
+    JsonSchemaResponseFormat | GrammarResponseFormat,
     Field(discriminator="type"),
 ]
 register_schema(ResponseFormat, name="ResponseFormat")
@@ -321,10 +325,10 @@ register_schema(ResponseFormat, name="ResponseFormat")
 class CompletionRequest(BaseModel):
     model: str
     content: InterleavedContent
-    sampling_params: Optional[SamplingParams] = Field(default_factory=SamplingParams)
-    response_format: Optional[ResponseFormat] = None
-    stream: Optional[bool] = False
-    logprobs: Optional[LogProbConfig] = None
+    sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
+    response_format: ResponseFormat | None = None
+    stream: bool | None = False
+    logprobs: LogProbConfig | None = None
 
 
 @json_schema_type
@@ -338,7 +342,7 @@ class CompletionResponse(MetricResponseMixin):
 
     content: str
     stop_reason: StopReason
-    logprobs: Optional[List[TokenLogProbs]] = None
+    logprobs: list[TokenLogProbs] | None = None
 
 
 @json_schema_type
@@ -351,8 +355,8 @@ class CompletionResponseStreamChunk(MetricResponseMixin):
     """
 
     delta: str
-    stop_reason: Optional[StopReason] = None
-    logprobs: Optional[List[TokenLogProbs]] = None
+    stop_reason: StopReason | None = None
+    logprobs: list[TokenLogProbs] | None = None
 
 
 class SystemMessageBehavior(Enum):
@@ -383,9 +387,9 @@ class ToolConfig(BaseModel):
             '{{function_definitions}}' to indicate where the function definitions should be inserted.
     """
 
-    tool_choice: Optional[ToolChoice | str] = Field(default=ToolChoice.auto)
-    tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
-    system_message_behavior: Optional[SystemMessageBehavior] = Field(default=SystemMessageBehavior.append)
+    tool_choice: ToolChoice | str | None = Field(default=ToolChoice.auto)
+    tool_prompt_format: ToolPromptFormat | None = Field(default=None)
+    system_message_behavior: SystemMessageBehavior | None = Field(default=SystemMessageBehavior.append)
 
     def model_post_init(self, __context: Any) -> None:
         if isinstance(self.tool_choice, str):
@@ -399,15 +403,15 @@ class ToolConfig(BaseModel):
 @json_schema_type
 class ChatCompletionRequest(BaseModel):
     model: str
-    messages: List[Message]
-    sampling_params: Optional[SamplingParams] = Field(default_factory=SamplingParams)
+    messages: list[Message]
+    sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
 
-    tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
-    tool_config: Optional[ToolConfig] = Field(default_factory=ToolConfig)
+    tools: list[ToolDefinition] | None = Field(default_factory=lambda: [])
+    tool_config: ToolConfig | None = Field(default_factory=ToolConfig)
 
-    response_format: Optional[ResponseFormat] = None
-    stream: Optional[bool] = False
-    logprobs: Optional[LogProbConfig] = None
+    response_format: ResponseFormat | None = None
+    stream: bool | None = False
+    logprobs: LogProbConfig | None = None
 
 
 @json_schema_type
@@ -429,7 +433,7 @@ class ChatCompletionResponse(MetricResponseMixin):
     """
 
     completion_message: CompletionMessage
-    logprobs: Optional[List[TokenLogProbs]] = None
+    logprobs: list[TokenLogProbs] | None = None
 
 
 @json_schema_type
@@ -439,7 +443,35 @@ class EmbeddingsResponse(BaseModel):
     :param embeddings: List of embedding vectors, one per input content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}
     """
 
-    embeddings: List[List[float]]
+    embeddings: list[list[float]]
+
+
+@json_schema_type
+class OpenAIChatCompletionContentPartTextParam(BaseModel):
+    type: Literal["text"] = "text"
+    text: str
+
+
+@json_schema_type
+class OpenAIImageURL(BaseModel):
+    url: str
+    detail: str | None = None
+
+
+@json_schema_type
+class OpenAIChatCompletionContentPartImageParam(BaseModel):
+    type: Literal["image_url"] = "image_url"
+    image_url: OpenAIImageURL
+
+
+OpenAIChatCompletionContentPartParam = Annotated[
+    OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")
+
+
+OpenAIChatCompletionMessageContent = str | list[OpenAIChatCompletionContentPartParam]
 
 
 @json_schema_type
@@ -452,8 +484,8 @@ class OpenAIUserMessageParam(BaseModel):
     """
 
     role: Literal["user"] = "user"
-    content: InterleavedContent
-    name: Optional[str] = None
+    content: OpenAIChatCompletionMessageContent
+    name: str | None = None
 
 
 @json_schema_type
@@ -466,8 +498,22 @@ class OpenAISystemMessageParam(BaseModel):
     """
 
     role: Literal["system"] = "system"
-    content: InterleavedContent
-    name: Optional[str] = None
+    content: OpenAIChatCompletionMessageContent
+    name: str | None = None
+
+
+@json_schema_type
+class OpenAIChatCompletionToolCallFunction(BaseModel):
+    name: str | None = None
+    arguments: str | None = None
+
+
+@json_schema_type
+class OpenAIChatCompletionToolCall(BaseModel):
+    index: int | None = None
+    id: str | None = None
+    type: Literal["function"] = "function"
+    function: OpenAIChatCompletionToolCallFunction | None = None
 
 
 @json_schema_type
@@ -477,13 +523,13 @@ class OpenAIAssistantMessageParam(BaseModel):
     :param role: Must be "assistant" to identify this as the model's response
     :param content: The content of the model's response
     :param name: (Optional) The name of the assistant message participant.
-    :param tool_calls: List of tool calls. Each tool call is a ToolCall object.
+    :param tool_calls: List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object.
     """
 
     role: Literal["assistant"] = "assistant"
-    content: InterleavedContent
-    name: Optional[str] = None
-    tool_calls: Optional[List[ToolCall]] = Field(default_factory=list)
+    content: OpenAIChatCompletionMessageContent | None = None
+    name: str | None = None
+    tool_calls: list[OpenAIChatCompletionToolCall] | None = None
 
 
 @json_schema_type
@@ -497,7 +543,7 @@ class OpenAIToolMessageParam(BaseModel):
 
     role: Literal["tool"] = "tool"
     tool_call_id: str
-    content: InterleavedContent
+    content: OpenAIChatCompletionMessageContent
 
 
 @json_schema_type
@@ -510,23 +556,57 @@ class OpenAIDeveloperMessageParam(BaseModel):
     """
 
     role: Literal["developer"] = "developer"
-    content: InterleavedContent
-    name: Optional[str] = None
+    content: OpenAIChatCompletionMessageContent
+    name: str | None = None
 
 
 OpenAIMessageParam = Annotated[
-    Union[
-        OpenAIUserMessageParam,
-        OpenAISystemMessageParam,
-        OpenAIAssistantMessageParam,
-        OpenAIToolMessageParam,
-        OpenAIDeveloperMessageParam,
-    ],
+    OpenAIUserMessageParam
+    | OpenAISystemMessageParam
+    | OpenAIAssistantMessageParam
+    | OpenAIToolMessageParam
+    | OpenAIDeveloperMessageParam,
     Field(discriminator="role"),
 ]
 register_schema(OpenAIMessageParam, name="OpenAIMessageParam")
 
 
+@json_schema_type
+class OpenAIResponseFormatText(BaseModel):
+    type: Literal["text"] = "text"
+
+
+@json_schema_type
+class OpenAIJSONSchema(TypedDict, total=False):
+    name: str
+    description: str | None
+    strict: bool | None
+
+    # Pydantic BaseModel cannot be used with a schema param, since it already
+    # has one. And, we don't want to alias here because then have to handle
+    # that alias when converting to OpenAI params. So, to support schema,
+    # we use a TypedDict.
+    schema: dict[str, Any] | None
+
+
+@json_schema_type
+class OpenAIResponseFormatJSONSchema(BaseModel):
+    type: Literal["json_schema"] = "json_schema"
+    json_schema: OpenAIJSONSchema
+
+
+@json_schema_type
+class OpenAIResponseFormatJSONObject(BaseModel):
+    type: Literal["json_object"] = "json_object"
+
+
+OpenAIResponseFormatParam = Annotated[
+    OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIResponseFormatParam, name="OpenAIResponseFormatParam")
+
+
 @json_schema_type
 class OpenAITopLogProb(BaseModel):
     """The top log probability for a token from an OpenAI-compatible chat completion response.
@@ -537,7 +617,7 @@ class OpenAITopLogProb(BaseModel):
     """
 
     token: str
-    bytes: Optional[List[int]] = None
+    bytes: list[int] | None = None
     logprob: float
 
 
@@ -552,21 +632,53 @@ class OpenAITokenLogProb(BaseModel):
     """
 
     token: str
-    bytes: Optional[List[int]] = None
+    bytes: list[int] | None = None
     logprob: float
-    top_logprobs: List[OpenAITopLogProb]
+    top_logprobs: list[OpenAITopLogProb]
 
 
 @json_schema_type
 class OpenAIChoiceLogprobs(BaseModel):
     """The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
 
-    :content: (Optional) The log probabilities for the tokens in the message
-    :refusal: (Optional) The log probabilities for the tokens in the message
+    :param content: (Optional) The log probabilities for the tokens in the message
+    :param refusal: (Optional) The log probabilities for the tokens in the message
     """
 
-    content: Optional[List[OpenAITokenLogProb]] = None
-    refusal: Optional[List[OpenAITokenLogProb]] = None
+    content: list[OpenAITokenLogProb] | None = None
+    refusal: list[OpenAITokenLogProb] | None = None
+
+
+@json_schema_type
+class OpenAIChoiceDelta(BaseModel):
+    """A delta from an OpenAI-compatible chat completion streaming response.
+
+    :param content: (Optional) The content of the delta
+    :param refusal: (Optional) The refusal of the delta
+    :param role: (Optional) The role of the delta
+    :param tool_calls: (Optional) The tool calls of the delta
+    """
+
+    content: str | None = None
+    refusal: str | None = None
+    role: str | None = None
+    tool_calls: list[OpenAIChatCompletionToolCall] | None = None
+
+
+@json_schema_type
+class OpenAIChunkChoice(BaseModel):
+    """A chunk choice from an OpenAI-compatible chat completion streaming response.
+
+    :param delta: The delta from the chunk
+    :param finish_reason: The reason the model stopped generating
+    :param index: The index of the choice
+    :param logprobs: (Optional) The log probabilities for the tokens in the message
+    """
+
+    delta: OpenAIChoiceDelta
+    finish_reason: str
+    index: int
+    logprobs: OpenAIChoiceLogprobs | None = None
 
 
 @json_schema_type
@@ -575,14 +687,14 @@ class OpenAIChoice(BaseModel):
 
     :param message: The message from the model
     :param finish_reason: The reason the model stopped generating
-    :index: The index of the choice
-    :logprobs: (Optional) The log probabilities for the tokens in the message
+    :param index: The index of the choice
+    :param logprobs: (Optional) The log probabilities for the tokens in the message
     """
 
     message: OpenAIMessageParam
     finish_reason: str
     index: int
-    logprobs: Optional[OpenAIChoiceLogprobs] = None
+    logprobs: OpenAIChoiceLogprobs | None = None
 
 
 @json_schema_type
@@ -597,12 +709,30 @@ class OpenAIChatCompletion(BaseModel):
     """
 
     id: str
-    choices: List[OpenAIChoice]
+    choices: list[OpenAIChoice]
     object: Literal["chat.completion"] = "chat.completion"
     created: int
     model: str
 
 
+@json_schema_type
+class OpenAIChatCompletionChunk(BaseModel):
+    """Chunk from a streaming response to an OpenAI-compatible chat completion request.
+
+    :param id: The ID of the chat completion
+    :param choices: List of choices
+    :param object: The object type, which will be "chat.completion.chunk"
+    :param created: The Unix timestamp in seconds when the chat completion was created
+    :param model: The model that was used to generate the chat completion
+    """
+
+    id: str
+    choices: list[OpenAIChunkChoice]
+    object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
+    created: int
+    model: str
+
+
 @json_schema_type
 class OpenAICompletionLogprobs(BaseModel):
     """The log probabilities for the tokens in the message from an OpenAI-compatible completion response.
@@ -613,10 +743,10 @@ class OpenAICompletionLogprobs(BaseModel):
     :top_logprobs: (Optional) The top log probabilities for the tokens
     """
 
-    text_offset: Optional[List[int]] = None
-    token_logprobs: Optional[List[float]] = None
-    tokens: Optional[List[str]] = None
-    top_logprobs: Optional[List[Dict[str, float]]] = None
+    text_offset: list[int] | None = None
+    token_logprobs: list[float] | None = None
+    tokens: list[str] | None = None
+    top_logprobs: list[dict[str, float]] | None = None
 
 
 @json_schema_type
@@ -632,7 +762,7 @@ class OpenAICompletionChoice(BaseModel):
     finish_reason: str
     text: str
     index: int
-    logprobs: Optional[OpenAIChoiceLogprobs] = None
+    logprobs: OpenAIChoiceLogprobs | None = None
 
 
 @json_schema_type
@@ -647,12 +777,54 @@ class OpenAICompletion(BaseModel):
     """
 
     id: str
-    choices: List[OpenAICompletionChoice]
+    choices: list[OpenAICompletionChoice]
     created: int
     model: str
     object: Literal["text_completion"] = "text_completion"
 
 
+@json_schema_type
+class OpenAIEmbeddingData(BaseModel):
+    """A single embedding data object from an OpenAI-compatible embeddings response.
+
+    :param object: The object type, which will be "embedding"
+    :param embedding: The embedding vector as a list of floats (when encoding_format="float") or as a base64-encoded string (when encoding_format="base64")
+    :param index: The index of the embedding in the input list
+    """
+
+    object: Literal["embedding"] = "embedding"
+    embedding: list[float] | str
+    index: int
+
+
+@json_schema_type
+class OpenAIEmbeddingUsage(BaseModel):
+    """Usage information for an OpenAI-compatible embeddings response.
+
+    :param prompt_tokens: The number of tokens in the input
+    :param total_tokens: The total number of tokens used
+    """
+
+    prompt_tokens: int
+    total_tokens: int
+
+
+@json_schema_type
+class OpenAIEmbeddingsResponse(BaseModel):
+    """Response from an OpenAI-compatible embeddings request.
+
+    :param object: The object type, which will be "list"
+    :param data: List of embedding data objects
+    :param model: The model that was used to generate the embeddings
+    :param usage: Usage information
+    """
+
+    object: Literal["list"] = "list"
+    data: list[OpenAIEmbeddingData]
+    model: str
+    usage: OpenAIEmbeddingUsage
+
+
 class ModelStore(Protocol):
     async def get_model(self, identifier: str) -> Model: ...
 
@@ -683,23 +855,35 @@ class EmbeddingTaskType(Enum):
 
 @json_schema_type
 class BatchCompletionResponse(BaseModel):
-    batch: List[CompletionResponse]
+    batch: list[CompletionResponse]
 
 
 @json_schema_type
 class BatchChatCompletionResponse(BaseModel):
-    batch: List[ChatCompletionResponse]
+    batch: list[ChatCompletionResponse]
+
+
+class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
+    input_messages: list[OpenAIMessageParam]
+
+
+@json_schema_type
+class ListOpenAIChatCompletionResponse(BaseModel):
+    data: list[OpenAICompletionWithInputMessages]
+    has_more: bool
+    first_id: str
+    last_id: str
+    object: Literal["list"] = "list"
 
 
 @runtime_checkable
 @trace_protocol
-class Inference(Protocol):
-    """Llama Stack Inference API for generating completions, chat completions, and embeddings.
-
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
-    - LLM models: these models generate "raw" and "chat" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search.
+class InferenceProvider(Protocol):
     """
+    This protocol defines the interface that should be implemented by all inference providers.
+    """
+
+    API_NAMESPACE: str = "Inference"
 
     model_store: ModelStore | None = None
 
@@ -708,21 +892,21 @@ class Inference(Protocol):
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+    ) -> CompletionResponse | AsyncIterator[CompletionResponseStreamChunk]:
         """Generate a completion for the given content using the specified model.
 
         :param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
-        :param content: The content to generate a completion for
-        :param sampling_params: (Optional) Parameters to control the sampling strategy
-        :param response_format: (Optional) Grammar specification for guided (structured) decoding
+        :param content: The content to generate a completion for.
+        :param sampling_params: (Optional) Parameters to control the sampling strategy.
+        :param response_format: (Optional) Grammar specification for guided (structured) decoding.
         :param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
         :param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
         :returns: If stream=False, returns a CompletionResponse with the full completion.
-                 If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
+                 If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
         """
         ...
 
@@ -730,33 +914,42 @@ class Inference(Protocol):
     async def batch_completion(
         self,
         model_id: str,
-        content_batch: List[InterleavedContent],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        content_batch: list[InterleavedContent],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ) -> BatchCompletionResponse:
+        """Generate completions for a batch of content using the specified model.
+
+        :param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
+        :param content_batch: The content to generate completions for.
+        :param sampling_params: (Optional) Parameters to control the sampling strategy.
+        :param response_format: (Optional) Grammar specification for guided (structured) decoding.
+        :param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
+        :returns: A BatchCompletionResponse with the full completions.
+        """
         raise NotImplementedError("Batch completion is not implemented")
 
     @webmethod(route="/inference/chat-completion", method="POST")
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
-    ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
+    ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
         """Generate a chat completion for the given messages using the specified model.
 
         :param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
-        :param messages: List of messages in the conversation
-        :param sampling_params: Parameters to control the sampling strategy
-        :param tools: (Optional) List of tool definitions available to the model
+        :param messages: List of messages in the conversation.
+        :param sampling_params: Parameters to control the sampling strategy.
+        :param tools: (Optional) List of tool definitions available to the model.
         :param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
             .. deprecated::
                Use tool_config instead.
@@ -773,7 +966,7 @@ class Inference(Protocol):
         :param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
         :param tool_config: (Optional) Configuration for tool use.
         :returns: If stream=False, returns a ChatCompletionResponse with the full completion.
-                 If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
+                 If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk.
         """
         ...
 
@@ -781,23 +974,34 @@ class Inference(Protocol):
     async def batch_chat_completion(
         self,
         model_id: str,
-        messages_batch: List[List[Message]],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        messages_batch: list[list[Message]],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_config: ToolConfig | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ) -> BatchChatCompletionResponse:
+        """Generate chat completions for a batch of messages using the specified model.
+
+        :param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
+        :param messages_batch: The messages to generate completions for.
+        :param sampling_params: (Optional) Parameters to control the sampling strategy.
+        :param tools: (Optional) List of tool definitions available to the model.
+        :param tool_config: (Optional) Configuration for tool use.
+        :param response_format: (Optional) Grammar specification for guided (structured) decoding.
+        :param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
+        :returns: A BatchChatCompletionResponse with the full completions.
+        """
         raise NotImplementedError("Batch chat completion is not implemented")
 
     @webmethod(route="/inference/embeddings", method="POST")
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         """Generate embeddings for content pieces using the specified model.
 
@@ -806,7 +1010,7 @@ class Inference(Protocol):
         :param output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by Matryoshka models.
         :param text_truncation: (Optional) Config for how to truncate text for embedding when text is longer than the model's max sequence length.
         :param task_type: (Optional) How is the embedding being used? This is only supported by asymmetric embedding models.
-        :returns: An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}
+        :returns: An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}.
         """
         ...
 
@@ -815,45 +1019,46 @@ class Inference(Protocol):
         self,
         # Standard OpenAI completion parameters
         model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
         # vLLM-specific parameters
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
     ) -> OpenAICompletion:
         """Generate an OpenAI-compatible completion for the given prompt using the specified model.
 
         :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
-        :param prompt: The prompt to generate a completion for
-        :param best_of: (Optional) The number of completions to generate
-        :param echo: (Optional) Whether to echo the prompt
-        :param frequency_penalty: (Optional) The penalty for repeated tokens
-        :param logit_bias: (Optional) The logit bias to use
-        :param logprobs: (Optional) The log probabilities to use
-        :param max_tokens: (Optional) The maximum number of tokens to generate
-        :param n: (Optional) The number of completions to generate
-        :param presence_penalty: (Optional) The penalty for repeated tokens
-        :param seed: (Optional) The seed to use
-        :param stop: (Optional) The stop tokens to use
-        :param stream: (Optional) Whether to stream the response
-        :param stream_options: (Optional) The stream options to use
-        :param temperature: (Optional) The temperature to use
-        :param top_p: (Optional) The top p to use
-        :param user: (Optional) The user to use
+        :param prompt: The prompt to generate a completion for.
+        :param best_of: (Optional) The number of completions to generate.
+        :param echo: (Optional) Whether to echo the prompt.
+        :param frequency_penalty: (Optional) The penalty for repeated tokens.
+        :param logit_bias: (Optional) The logit bias to use.
+        :param logprobs: (Optional) The log probabilities to use.
+        :param max_tokens: (Optional) The maximum number of tokens to generate.
+        :param n: (Optional) The number of completions to generate.
+        :param presence_penalty: (Optional) The penalty for repeated tokens.
+        :param seed: (Optional) The seed to use.
+        :param stop: (Optional) The stop tokens to use.
+        :param stream: (Optional) Whether to stream the response.
+        :param stream_options: (Optional) The stream options to use.
+        :param temperature: (Optional) The temperature to use.
+        :param top_p: (Optional) The top p to use.
+        :param user: (Optional) The user to use.
+        :returns: An OpenAICompletion.
         """
         ...
 
@@ -861,53 +1066,110 @@ class Inference(Protocol):
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIMessageParam],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-    ) -> OpenAIChatCompletion:
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
         """Generate an OpenAI-compatible chat completion for the given messages using the specified model.
 
         :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
-        :param messages: List of messages in the conversation
-        :param frequency_penalty: (Optional) The penalty for repeated tokens
-        :param function_call: (Optional) The function call to use
-        :param functions: (Optional) List of functions to use
-        :param logit_bias: (Optional) The logit bias to use
-        :param logprobs: (Optional) The log probabilities to use
-        :param max_completion_tokens: (Optional) The maximum number of tokens to generate
-        :param max_tokens: (Optional) The maximum number of tokens to generate
-        :param n: (Optional) The number of completions to generate
-        :param parallel_tool_calls: (Optional) Whether to parallelize tool calls
-        :param presence_penalty: (Optional) The penalty for repeated tokens
-        :param response_format: (Optional) The response format to use
-        :param seed: (Optional) The seed to use
-        :param stop: (Optional) The stop tokens to use
-        :param stream: (Optional) Whether to stream the response
-        :param stream_options: (Optional) The stream options to use
-        :param temperature: (Optional) The temperature to use
-        :param tool_choice: (Optional) The tool choice to use
-        :param tools: (Optional) The tools to use
-        :param top_logprobs: (Optional) The top log probabilities to use
-        :param top_p: (Optional) The top p to use
-        :param user: (Optional) The user to use
+        :param messages: List of messages in the conversation.
+        :param frequency_penalty: (Optional) The penalty for repeated tokens.
+        :param function_call: (Optional) The function call to use.
+        :param functions: (Optional) List of functions to use.
+        :param logit_bias: (Optional) The logit bias to use.
+        :param logprobs: (Optional) The log probabilities to use.
+        :param max_completion_tokens: (Optional) The maximum number of tokens to generate.
+        :param max_tokens: (Optional) The maximum number of tokens to generate.
+        :param n: (Optional) The number of completions to generate.
+        :param parallel_tool_calls: (Optional) Whether to parallelize tool calls.
+        :param presence_penalty: (Optional) The penalty for repeated tokens.
+        :param response_format: (Optional) The response format to use.
+        :param seed: (Optional) The seed to use.
+        :param stop: (Optional) The stop tokens to use.
+        :param stream: (Optional) Whether to stream the response.
+        :param stream_options: (Optional) The stream options to use.
+        :param temperature: (Optional) The temperature to use.
+        :param tool_choice: (Optional) The tool choice to use.
+        :param tools: (Optional) The tools to use.
+        :param top_logprobs: (Optional) The top log probabilities to use.
+        :param top_p: (Optional) The top p to use.
+        :param user: (Optional) The user to use.
+        :returns: An OpenAIChatCompletion.
         """
         ...
+
+    @webmethod(route="/openai/v1/embeddings", method="POST")
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        """Generate OpenAI-compatible embeddings for the given input using the specified model.
+
+        :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
+        :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
+        :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
+        :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
+        :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
+        :returns: An OpenAIEmbeddingsResponse containing the embeddings.
+        """
+        ...
+
+
+class Inference(InferenceProvider):
+    """Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+    - Embedding models: these models generate embeddings to be used for semantic search.
+    """
+
+    @webmethod(route="/openai/v1/chat/completions", method="GET")
+    async def list_chat_completions(
+        self,
+        after: str | None = None,
+        limit: int | None = 20,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIChatCompletionResponse:
+        """List all chat completions.
+
+        :param after: The ID of the last chat completion to return.
+        :param limit: The maximum number of chat completions to return.
+        :param model: The model to filter by.
+        :param order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".
+        :returns: A ListOpenAIChatCompletionResponse.
+        """
+        raise NotImplementedError("List chat completions is not implemented")
+
+    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
+    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
+        """Describe a chat completion by its ID.
+
+        :param completion_id: ID of the chat completion.
+        :returns: A OpenAICompletionWithInputMessages.
+        """
+        raise NotImplementedError("Get chat completion is not implemented")
diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py
index 3896d67a9..44a5e95b2 100644
--- a/llama_stack/apis/inspect/inspect.py
+++ b/llama_stack/apis/inspect/inspect.py
@@ -4,10 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List, Protocol, runtime_checkable
+from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
+from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 
@@ -15,13 +16,12 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 class RouteInfo(BaseModel):
     route: str
     method: str
-    provider_types: List[str]
+    provider_types: list[str]
 
 
 @json_schema_type
 class HealthInfo(BaseModel):
-    status: str
-    # TODO: add a provider level status
+    status: HealthStatus
 
 
 @json_schema_type
@@ -30,16 +30,31 @@ class VersionInfo(BaseModel):
 
 
 class ListRoutesResponse(BaseModel):
-    data: List[RouteInfo]
+    data: list[RouteInfo]
 
 
 @runtime_checkable
 class Inspect(Protocol):
     @webmethod(route="/inspect/routes", method="GET")
-    async def list_routes(self) -> ListRoutesResponse: ...
+    async def list_routes(self) -> ListRoutesResponse:
+        """List all routes.
+
+        :returns: A ListRoutesResponse.
+        """
+        ...
 
     @webmethod(route="/health", method="GET")
-    async def health(self) -> HealthInfo: ...
+    async def health(self) -> HealthInfo:
+        """Get the health of the service.
+
+        :returns: A HealthInfo.
+        """
+        ...
 
     @webmethod(route="/version", method="GET")
-    async def version(self) -> VersionInfo: ...
+    async def version(self) -> VersionInfo:
+        """Get the version of the service.
+
+        :returns: A VersionInfo.
+        """
+        ...
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 97398ce75..3d90a92a0 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable
+from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, ConfigDict, Field
 
@@ -15,7 +15,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 
 
 class CommonModelFields(BaseModel):
-    metadata: Dict[str, Any] = Field(
+    metadata: dict[str, Any] = Field(
         default_factory=dict,
         description="Any additional metadata for this model",
     )
@@ -29,14 +29,14 @@ class ModelType(str, Enum):
 
 @json_schema_type
 class Model(CommonModelFields, Resource):
-    type: Literal[ResourceType.model.value] = ResourceType.model.value
+    type: Literal[ResourceType.model] = ResourceType.model
 
     @property
     def model_id(self) -> str:
         return self.identifier
 
     @property
-    def provider_model_id(self) -> str:
+    def provider_model_id(self) -> str | None:
         return self.provider_resource_id
 
     model_config = ConfigDict(protected_namespaces=())
@@ -46,14 +46,14 @@ class Model(CommonModelFields, Resource):
 
 class ModelInput(CommonModelFields):
     model_id: str
-    provider_id: Optional[str] = None
-    provider_model_id: Optional[str] = None
-    model_type: Optional[ModelType] = ModelType.llm
+    provider_id: str | None = None
+    provider_model_id: str | None = None
+    model_type: ModelType | None = ModelType.llm
     model_config = ConfigDict(protected_namespaces=())
 
 
 class ListModelsResponse(BaseModel):
-    data: List[Model]
+    data: list[Model]
 
 
 @json_schema_type
@@ -73,36 +73,67 @@ class OpenAIModel(BaseModel):
 
 
 class OpenAIListModelsResponse(BaseModel):
-    data: List[OpenAIModel]
+    data: list[OpenAIModel]
 
 
 @runtime_checkable
 @trace_protocol
 class Models(Protocol):
     @webmethod(route="/models", method="GET")
-    async def list_models(self) -> ListModelsResponse: ...
+    async def list_models(self) -> ListModelsResponse:
+        """List all models.
+
+        :returns: A ListModelsResponse.
+        """
+        ...
 
     @webmethod(route="/openai/v1/models", method="GET")
-    async def openai_list_models(self) -> OpenAIListModelsResponse: ...
+    async def openai_list_models(self) -> OpenAIListModelsResponse:
+        """List models using the OpenAI API.
+
+        :returns: A OpenAIListModelsResponse.
+        """
+        ...
 
     @webmethod(route="/models/{model_id:path}", method="GET")
     async def get_model(
         self,
         model_id: str,
-    ) -> Model: ...
+    ) -> Model:
+        """Get a model by its identifier.
+
+        :param model_id: The identifier of the model to get.
+        :returns: A Model.
+        """
+        ...
 
     @webmethod(route="/models", method="POST")
     async def register_model(
         self,
         model_id: str,
-        provider_model_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-        model_type: Optional[ModelType] = None,
-    ) -> Model: ...
+        provider_model_id: str | None = None,
+        provider_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+        model_type: ModelType | None = None,
+    ) -> Model:
+        """Register a model.
+
+        :param model_id: The identifier of the model to register.
+        :param provider_model_id: The identifier of the model in the provider.
+        :param provider_id: The identifier of the provider.
+        :param metadata: Any additional metadata for this model.
+        :param model_type: The type of model to register.
+        :returns: A Model.
+        """
+        ...
 
     @webmethod(route="/models/{model_id:path}", method="DELETE")
     async def unregister_model(
         self,
         model_id: str,
-    ) -> None: ...
+    ) -> None:
+        """Unregister a model.
+
+        :param model_id: The identifier of the model to unregister.
+        """
+        ...
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index e5f1bcb65..b196c8a17 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -6,10 +6,9 @@
 
 from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional, Protocol, Union
+from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
-from typing_extensions import Annotated
 
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.common.job_types import JobStatus
@@ -36,9 +35,9 @@ class DataConfig(BaseModel):
     batch_size: int
     shuffle: bool
     data_format: DatasetFormat
-    validation_dataset_id: Optional[str] = None
-    packed: Optional[bool] = False
-    train_on_input: Optional[bool] = False
+    validation_dataset_id: str | None = None
+    packed: bool | None = False
+    train_on_input: bool | None = False
 
 
 @json_schema_type
@@ -51,10 +50,10 @@ class OptimizerConfig(BaseModel):
 
 @json_schema_type
 class EfficiencyConfig(BaseModel):
-    enable_activation_checkpointing: Optional[bool] = False
-    enable_activation_offloading: Optional[bool] = False
-    memory_efficient_fsdp_wrap: Optional[bool] = False
-    fsdp_cpu_offload: Optional[bool] = False
+    enable_activation_checkpointing: bool | None = False
+    enable_activation_offloading: bool | None = False
+    memory_efficient_fsdp_wrap: bool | None = False
+    fsdp_cpu_offload: bool | None = False
 
 
 @json_schema_type
@@ -62,23 +61,23 @@ class TrainingConfig(BaseModel):
     n_epochs: int
     max_steps_per_epoch: int = 1
     gradient_accumulation_steps: int = 1
-    max_validation_steps: Optional[int] = 1
-    data_config: Optional[DataConfig] = None
-    optimizer_config: Optional[OptimizerConfig] = None
-    efficiency_config: Optional[EfficiencyConfig] = None
-    dtype: Optional[str] = "bf16"
+    max_validation_steps: int | None = 1
+    data_config: DataConfig | None = None
+    optimizer_config: OptimizerConfig | None = None
+    efficiency_config: EfficiencyConfig | None = None
+    dtype: str | None = "bf16"
 
 
 @json_schema_type
 class LoraFinetuningConfig(BaseModel):
     type: Literal["LoRA"] = "LoRA"
-    lora_attn_modules: List[str]
+    lora_attn_modules: list[str]
     apply_lora_to_mlp: bool
     apply_lora_to_output: bool
     rank: int
     alpha: int
-    use_dora: Optional[bool] = False
-    quantize_base: Optional[bool] = False
+    use_dora: bool | None = False
+    quantize_base: bool | None = False
 
 
 @json_schema_type
@@ -88,7 +87,7 @@ class QATFinetuningConfig(BaseModel):
     group_size: int
 
 
-AlgorithmConfig = Annotated[Union[LoraFinetuningConfig, QATFinetuningConfig], Field(discriminator="type")]
+AlgorithmConfig = Annotated[LoraFinetuningConfig | QATFinetuningConfig, Field(discriminator="type")]
 register_schema(AlgorithmConfig, name="AlgorithmConfig")
 
 
@@ -97,7 +96,7 @@ class PostTrainingJobLogStream(BaseModel):
     """Stream of logs from a finetuning job."""
 
     job_uuid: str
-    log_lines: List[str]
+    log_lines: list[str]
 
 
 @json_schema_type
@@ -131,8 +130,8 @@ class PostTrainingRLHFRequest(BaseModel):
     training_config: TrainingConfig
 
     # TODO: define these
-    hyperparam_search_config: Dict[str, Any]
-    logger_config: Dict[str, Any]
+    hyperparam_search_config: dict[str, Any]
+    logger_config: dict[str, Any]
 
 
 class PostTrainingJob(BaseModel):
@@ -146,17 +145,17 @@ class PostTrainingJobStatusResponse(BaseModel):
     job_uuid: str
     status: JobStatus
 
-    scheduled_at: Optional[datetime] = None
-    started_at: Optional[datetime] = None
-    completed_at: Optional[datetime] = None
+    scheduled_at: datetime | None = None
+    started_at: datetime | None = None
+    completed_at: datetime | None = None
 
-    resources_allocated: Optional[Dict[str, Any]] = None
+    resources_allocated: dict[str, Any] | None = None
 
-    checkpoints: List[Checkpoint] = Field(default_factory=list)
+    checkpoints: list[Checkpoint] = Field(default_factory=list)
 
 
 class ListPostTrainingJobsResponse(BaseModel):
-    data: List[PostTrainingJob]
+    data: list[PostTrainingJob]
 
 
 @json_schema_type
@@ -164,7 +163,7 @@ class PostTrainingJobArtifactsResponse(BaseModel):
     """Artifacts of a finetuning job."""
 
     job_uuid: str
-    checkpoints: List[Checkpoint] = Field(default_factory=list)
+    checkpoints: list[Checkpoint] = Field(default_factory=list)
 
     # TODO(ashwin): metrics, evals
 
@@ -175,15 +174,27 @@ class PostTraining(Protocol):
         self,
         job_uuid: str,
         training_config: TrainingConfig,
-        hyperparam_search_config: Dict[str, Any],
-        logger_config: Dict[str, Any],
-        model: Optional[str] = Field(
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
+        model: str | None = Field(
             default=None,
             description="Model descriptor for training if not in provider config`",
         ),
-        checkpoint_dir: Optional[str] = None,
-        algorithm_config: Optional[AlgorithmConfig] = None,
-    ) -> PostTrainingJob: ...
+        checkpoint_dir: str | None = None,
+        algorithm_config: AlgorithmConfig | None = None,
+    ) -> PostTrainingJob:
+        """Run supervised fine-tuning of a model.
+
+        :param job_uuid: The UUID of the job to create.
+        :param training_config: The training configuration.
+        :param hyperparam_search_config: The hyperparam search configuration.
+        :param logger_config: The logger configuration.
+        :param model: The model to fine-tune.
+        :param checkpoint_dir: The directory to save checkpoint(s) to.
+        :param algorithm_config: The algorithm configuration.
+        :returns: A PostTrainingJob.
+        """
+        ...
 
     @webmethod(route="/post-training/preference-optimize", method="POST")
     async def preference_optimize(
@@ -192,18 +203,51 @@ class PostTraining(Protocol):
         finetuned_model: str,
         algorithm_config: DPOAlignmentConfig,
         training_config: TrainingConfig,
-        hyperparam_search_config: Dict[str, Any],
-        logger_config: Dict[str, Any],
-    ) -> PostTrainingJob: ...
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
+    ) -> PostTrainingJob:
+        """Run preference optimization of a model.
+
+        :param job_uuid: The UUID of the job to create.
+        :param finetuned_model: The model to fine-tune.
+        :param algorithm_config: The algorithm configuration.
+        :param training_config: The training configuration.
+        :param hyperparam_search_config: The hyperparam search configuration.
+        :param logger_config: The logger configuration.
+        :returns: A PostTrainingJob.
+        """
+        ...
 
     @webmethod(route="/post-training/jobs", method="GET")
-    async def get_training_jobs(self) -> ListPostTrainingJobsResponse: ...
+    async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
+        """Get all training jobs.
+
+        :returns: A ListPostTrainingJobsResponse.
+        """
+        ...
 
     @webmethod(route="/post-training/job/status", method="GET")
-    async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: ...
+    async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
+        """Get the status of a training job.
+
+        :param job_uuid: The UUID of the job to get the status of.
+        :returns: A PostTrainingJobStatusResponse.
+        """
+        ...
 
     @webmethod(route="/post-training/job/cancel", method="POST")
-    async def cancel_training_job(self, job_uuid: str) -> None: ...
+    async def cancel_training_job(self, job_uuid: str) -> None:
+        """Cancel a training job.
+
+        :param job_uuid: The UUID of the job to cancel.
+        """
+        ...
 
     @webmethod(route="/post-training/job/artifacts", method="GET")
-    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: ...
+    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
+        """Get the artifacts of a training job.
+
+        :param job_uuid: The UUID of the job to get the artifacts of.
+        :returns: A PostTrainingJobArtifactsResponse.
+        """
+        ...
diff --git a/llama_stack/apis/providers/providers.py b/llama_stack/apis/providers/providers.py
index 83d03d7c1..4bc977bf1 100644
--- a/llama_stack/apis/providers/providers.py
+++ b/llama_stack/apis/providers/providers.py
@@ -4,10 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, List, Protocol, runtime_checkable
+from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
+from llama_stack.providers.datatypes import HealthResponse
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 
@@ -16,11 +17,12 @@ class ProviderInfo(BaseModel):
     api: str
     provider_id: str
     provider_type: str
-    config: Dict[str, Any]
+    config: dict[str, Any]
+    health: HealthResponse
 
 
 class ListProvidersResponse(BaseModel):
-    data: List[ProviderInfo]
+    data: list[ProviderInfo]
 
 
 @runtime_checkable
@@ -30,7 +32,18 @@ class Providers(Protocol):
     """
 
     @webmethod(route="/providers", method="GET")
-    async def list_providers(self) -> ListProvidersResponse: ...
+    async def list_providers(self) -> ListProvidersResponse:
+        """List all available providers.
+
+        :returns: A ListProvidersResponse containing information about all providers.
+        """
+        ...
 
     @webmethod(route="/providers/{provider_id}", method="GET")
-    async def inspect_provider(self, provider_id: str) -> ProviderInfo: ...
+    async def inspect_provider(self, provider_id: str) -> ProviderInfo:
+        """Get detailed information about a specific provider.
+
+        :param provider_id: The ID of the provider to inspect.
+        :returns: A ProviderInfo object containing the provider's details.
+        """
+        ...
diff --git a/llama_stack/apis/resource.py b/llama_stack/apis/resource.py
index 70ec63c55..175baa7b9 100644
--- a/llama_stack/apis/resource.py
+++ b/llama_stack/apis/resource.py
@@ -4,12 +4,23 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import sys
 from enum import Enum
 
 from pydantic import BaseModel, Field
 
+# TODO: use enum.StrEnum when we drop support for python 3.10
+if sys.version_info >= (3, 11):
+    from enum import StrEnum
+else:
 
-class ResourceType(Enum):
+    class StrEnum(str, Enum):
+        """Backport of StrEnum for Python 3.10 and below."""
+
+        pass
+
+
+class ResourceType(StrEnum):
     model = "model"
     shield = "shield"
     vector_db = "vector_db"
@@ -25,9 +36,9 @@ class Resource(BaseModel):
 
     identifier: str = Field(description="Unique identifier for this resource in llama stack")
 
-    provider_resource_id: str = Field(
-        description="Unique identifier for this resource in the provider",
+    provider_resource_id: str | None = Field(
         default=None,
+        description="Unique identifier for this resource in the provider",
     )
 
     provider_id: str = Field(description="ID of the provider that owns this resource")
diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py
index fd2f0292c..3aee52b7e 100644
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
+from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
@@ -27,16 +27,16 @@ class SafetyViolation(BaseModel):
     violation_level: ViolationLevel
 
     # what message should you convey to the user
-    user_message: Optional[str] = None
+    user_message: str | None = None
 
     # additional metadata (including specific violation codes) more for
     # debugging, telemetry
-    metadata: Dict[str, Any] = Field(default_factory=dict)
+    metadata: dict[str, Any] = Field(default_factory=dict)
 
 
 @json_schema_type
 class RunShieldResponse(BaseModel):
-    violation: Optional[SafetyViolation] = None
+    violation: SafetyViolation | None = None
 
 
 class ShieldStore(Protocol):
@@ -52,6 +52,14 @@ class Safety(Protocol):
     async def run_shield(
         self,
         shield_id: str,
-        messages: List[Message],
-        params: Dict[str, Any] = None,
-    ) -> RunShieldResponse: ...
+        messages: list[Message],
+        params: dict[str, Any],
+    ) -> RunShieldResponse:
+        """Run a shield.
+
+        :param shield_id: The identifier of the shield to run.
+        :param messages: The messages to run the shield on.
+        :param params: The parameters of the shield.
+        :returns: A RunShieldResponse.
+        """
+        ...
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index 54a9ac2aa..732e80e79 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
+from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
@@ -12,7 +12,7 @@ from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 # mapping of metric to value
-ScoringResultRow = Dict[str, Any]
+ScoringResultRow = dict[str, Any]
 
 
 @json_schema_type
@@ -24,15 +24,15 @@ class ScoringResult(BaseModel):
     :param aggregated_results: Map of metric name to aggregated value
     """
 
-    score_rows: List[ScoringResultRow]
+    score_rows: list[ScoringResultRow]
     # aggregated metrics to value
-    aggregated_results: Dict[str, Any]
+    aggregated_results: dict[str, Any]
 
 
 @json_schema_type
 class ScoreBatchResponse(BaseModel):
-    dataset_id: Optional[str] = None
-    results: Dict[str, ScoringResult]
+    dataset_id: str | None = None
+    results: dict[str, ScoringResult]
 
 
 @json_schema_type
@@ -44,7 +44,7 @@ class ScoreResponse(BaseModel):
     """
 
     # each key in the dict is a scoring function name
-    results: Dict[str, ScoringResult]
+    results: dict[str, ScoringResult]
 
 
 class ScoringFunctionStore(Protocol):
@@ -59,20 +59,28 @@ class Scoring(Protocol):
     async def score_batch(
         self,
         dataset_id: str,
-        scoring_functions: Dict[str, Optional[ScoringFnParams]],
+        scoring_functions: dict[str, ScoringFnParams | None],
         save_results_dataset: bool = False,
-    ) -> ScoreBatchResponse: ...
+    ) -> ScoreBatchResponse:
+        """Score a batch of rows.
+
+        :param dataset_id: The ID of the dataset to score.
+        :param scoring_functions: The scoring functions to use for the scoring.
+        :param save_results_dataset: Whether to save the results to a dataset.
+        :returns: A ScoreBatchResponse.
+        """
+        ...
 
     @webmethod(route="/scoring/score", method="POST")
     async def score(
         self,
-        input_rows: List[Dict[str, Any]],
-        scoring_functions: Dict[str, Optional[ScoringFnParams]],
+        input_rows: list[dict[str, Any]],
+        scoring_functions: dict[str, ScoringFnParams | None],
     ) -> ScoreResponse:
         """Score a list of rows.
 
         :param input_rows: The rows to score.
         :param scoring_functions: The scoring functions to use for the scoring.
-        :return: ScoreResponse object containing rows and aggregated results
+        :returns: A ScoreResponse object containing rows and aggregated results.
         """
         ...
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py
index 4f85947dd..9cd21b7d1 100644
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -4,37 +4,44 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+# TODO: use enum.StrEnum when we drop support for python 3.10
+import sys
 from enum import Enum
 from typing import (
+    Annotated,
     Any,
-    Dict,
-    List,
     Literal,
-    Optional,
     Protocol,
-    Union,
     runtime_checkable,
 )
 
 from pydantic import BaseModel, Field
-from typing_extensions import Annotated
 
 from llama_stack.apis.common.type_system import ParamType
 from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
+if sys.version_info >= (3, 11):
+    from enum import StrEnum
+else:
+
+    class StrEnum(str, Enum):
+        """Backport of StrEnum for Python 3.10 and below."""
+
+        pass
+
 
 # Perhaps more structure can be imposed on these functions. Maybe they could be associated
 # with standard metrics so they can be rolled up?
 @json_schema_type
-class ScoringFnParamsType(Enum):
+class ScoringFnParamsType(StrEnum):
     llm_as_judge = "llm_as_judge"
     regex_parser = "regex_parser"
     basic = "basic"
 
 
 @json_schema_type
-class AggregationFunctionType(Enum):
+class AggregationFunctionType(StrEnum):
     average = "average"
     weighted_average = "weighted_average"
     median = "median"
@@ -44,62 +51,58 @@ class AggregationFunctionType(Enum):
 
 @json_schema_type
 class LLMAsJudgeScoringFnParams(BaseModel):
-    type: Literal[ScoringFnParamsType.llm_as_judge.value] = ScoringFnParamsType.llm_as_judge.value
+    type: Literal[ScoringFnParamsType.llm_as_judge] = ScoringFnParamsType.llm_as_judge
     judge_model: str
-    prompt_template: Optional[str] = None
-    judge_score_regexes: Optional[List[str]] = Field(
+    prompt_template: str | None = None
+    judge_score_regexes: list[str] = Field(
         description="Regexes to extract the answer from generated response",
-        default_factory=list,
+        default_factory=lambda: [],
     )
-    aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
+    aggregation_functions: list[AggregationFunctionType] = Field(
         description="Aggregation functions to apply to the scores of each row",
-        default_factory=list,
+        default_factory=lambda: [],
     )
 
 
 @json_schema_type
 class RegexParserScoringFnParams(BaseModel):
-    type: Literal[ScoringFnParamsType.regex_parser.value] = ScoringFnParamsType.regex_parser.value
-    parsing_regexes: Optional[List[str]] = Field(
+    type: Literal[ScoringFnParamsType.regex_parser] = ScoringFnParamsType.regex_parser
+    parsing_regexes: list[str] = Field(
         description="Regex to extract the answer from generated response",
-        default_factory=list,
+        default_factory=lambda: [],
     )
-    aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
+    aggregation_functions: list[AggregationFunctionType] = Field(
         description="Aggregation functions to apply to the scores of each row",
-        default_factory=list,
+        default_factory=lambda: [],
     )
 
 
 @json_schema_type
 class BasicScoringFnParams(BaseModel):
-    type: Literal[ScoringFnParamsType.basic.value] = ScoringFnParamsType.basic.value
-    aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
+    type: Literal[ScoringFnParamsType.basic] = ScoringFnParamsType.basic
+    aggregation_functions: list[AggregationFunctionType] = Field(
         description="Aggregation functions to apply to the scores of each row",
         default_factory=list,
     )
 
 
 ScoringFnParams = Annotated[
-    Union[
-        LLMAsJudgeScoringFnParams,
-        RegexParserScoringFnParams,
-        BasicScoringFnParams,
-    ],
+    LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams,
     Field(discriminator="type"),
 ]
 register_schema(ScoringFnParams, name="ScoringFnParams")
 
 
 class CommonScoringFnFields(BaseModel):
-    description: Optional[str] = None
-    metadata: Dict[str, Any] = Field(
+    description: str | None = None
+    metadata: dict[str, Any] = Field(
         default_factory=dict,
         description="Any additional metadata for this definition",
     )
     return_type: ParamType = Field(
         description="The return type of the deterministic function",
     )
-    params: Optional[ScoringFnParams] = Field(
+    params: ScoringFnParams | None = Field(
         description="The parameters for the scoring function for benchmark eval, these can be overridden for app eval",
         default=None,
     )
@@ -107,34 +110,45 @@ class CommonScoringFnFields(BaseModel):
 
 @json_schema_type
 class ScoringFn(CommonScoringFnFields, Resource):
-    type: Literal[ResourceType.scoring_function.value] = ResourceType.scoring_function.value
+    type: Literal[ResourceType.scoring_function] = ResourceType.scoring_function
 
     @property
     def scoring_fn_id(self) -> str:
         return self.identifier
 
     @property
-    def provider_scoring_fn_id(self) -> str:
+    def provider_scoring_fn_id(self) -> str | None:
         return self.provider_resource_id
 
 
 class ScoringFnInput(CommonScoringFnFields, BaseModel):
     scoring_fn_id: str
-    provider_id: Optional[str] = None
-    provider_scoring_fn_id: Optional[str] = None
+    provider_id: str | None = None
+    provider_scoring_fn_id: str | None = None
 
 
 class ListScoringFunctionsResponse(BaseModel):
-    data: List[ScoringFn]
+    data: list[ScoringFn]
 
 
 @runtime_checkable
 class ScoringFunctions(Protocol):
     @webmethod(route="/scoring-functions", method="GET")
-    async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ...
+    async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
+        """List all scoring functions.
+
+        :returns: A ListScoringFunctionsResponse.
+        """
+        ...
 
     @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
-    async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn: ...
+    async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
+        """Get a scoring function by its ID.
+
+        :param scoring_fn_id: The ID of the scoring function to get.
+        :returns: A ScoringFn.
+        """
+        ...
 
     @webmethod(route="/scoring-functions", method="POST")
     async def register_scoring_function(
@@ -142,7 +156,17 @@ class ScoringFunctions(Protocol):
         scoring_fn_id: str,
         description: str,
         return_type: ParamType,
-        provider_scoring_fn_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-        params: Optional[ScoringFnParams] = None,
-    ) -> None: ...
+        provider_scoring_fn_id: str | None = None,
+        provider_id: str | None = None,
+        params: ScoringFnParams | None = None,
+    ) -> None:
+        """Register a scoring function.
+
+        :param scoring_fn_id: The ID of the scoring function to register.
+        :param description: The description of the scoring function.
+        :param return_type: The return type of the scoring function.
+        :param provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function.
+        :param provider_id: The ID of the provider to use for the scoring function.
+        :param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
+        """
+        ...
diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py
index 67f3bd27b..ce1f73d8e 100644
--- a/llama_stack/apis/shields/shields.py
+++ b/llama_stack/apis/shields/shields.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable
+from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
@@ -14,48 +14,68 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 
 
 class CommonShieldFields(BaseModel):
-    params: Optional[Dict[str, Any]] = None
+    params: dict[str, Any] | None = None
 
 
 @json_schema_type
 class Shield(CommonShieldFields, Resource):
     """A safety shield resource that can be used to check content"""
 
-    type: Literal[ResourceType.shield.value] = ResourceType.shield.value
+    type: Literal[ResourceType.shield] = ResourceType.shield
 
     @property
     def shield_id(self) -> str:
         return self.identifier
 
     @property
-    def provider_shield_id(self) -> str:
+    def provider_shield_id(self) -> str | None:
         return self.provider_resource_id
 
 
 class ShieldInput(CommonShieldFields):
     shield_id: str
-    provider_id: Optional[str] = None
-    provider_shield_id: Optional[str] = None
+    provider_id: str | None = None
+    provider_shield_id: str | None = None
 
 
 class ListShieldsResponse(BaseModel):
-    data: List[Shield]
+    data: list[Shield]
 
 
 @runtime_checkable
 @trace_protocol
 class Shields(Protocol):
     @webmethod(route="/shields", method="GET")
-    async def list_shields(self) -> ListShieldsResponse: ...
+    async def list_shields(self) -> ListShieldsResponse:
+        """List all shields.
+
+        :returns: A ListShieldsResponse.
+        """
+        ...
 
     @webmethod(route="/shields/{identifier:path}", method="GET")
-    async def get_shield(self, identifier: str) -> Shield: ...
+    async def get_shield(self, identifier: str) -> Shield:
+        """Get a shield by its identifier.
+
+        :param identifier: The identifier of the shield to get.
+        :returns: A Shield.
+        """
+        ...
 
     @webmethod(route="/shields", method="POST")
     async def register_shield(
         self,
         shield_id: str,
-        provider_shield_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-        params: Optional[Dict[str, Any]] = None,
-    ) -> Shield: ...
+        provider_shield_id: str | None = None,
+        provider_id: str | None = None,
+        params: dict[str, Any] | None = None,
+    ) -> Shield:
+        """Register a shield.
+
+        :param shield_id: The identifier of the shield to register.
+        :param provider_shield_id: The identifier of the shield in the provider.
+        :param provider_id: The identifier of the provider.
+        :param params: The parameters of the shield.
+        :returns: A Shield.
+        """
+        ...
diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
index 7b41192af..91e550da9 100644
--- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Any, Dict, List, Optional, Protocol, Union
+from typing import Any, Protocol
 
 from pydantic import BaseModel
 
@@ -28,24 +28,24 @@ class FilteringFunction(Enum):
 class SyntheticDataGenerationRequest(BaseModel):
     """Request to generate synthetic data. A small batch of prompts and a filtering function"""
 
-    dialogs: List[Message]
+    dialogs: list[Message]
     filtering_function: FilteringFunction = FilteringFunction.none
-    model: Optional[str] = None
+    model: str | None = None
 
 
 @json_schema_type
 class SyntheticDataGenerationResponse(BaseModel):
     """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
 
-    synthetic_data: List[Dict[str, Any]]
-    statistics: Optional[Dict[str, Any]] = None
+    synthetic_data: list[dict[str, Any]]
+    statistics: dict[str, Any] | None = None
 
 
 class SyntheticDataGeneration(Protocol):
     @webmethod(route="/synthetic-data-generation/generate")
     def synthetic_data_generate(
         self,
-        dialogs: List[Message],
+        dialogs: list[Message],
         filtering_function: FilteringFunction = FilteringFunction.none,
-        model: Optional[str] = None,
-    ) -> Union[SyntheticDataGenerationResponse]: ...
+        model: str | None = None,
+    ) -> SyntheticDataGenerationResponse: ...
diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index d57c311b2..0eb53f397 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -7,18 +7,14 @@
 from datetime import datetime
 from enum import Enum
 from typing import (
+    Annotated,
     Any,
-    Dict,
-    List,
     Literal,
-    Optional,
     Protocol,
-    Union,
     runtime_checkable,
 )
 
 from pydantic import BaseModel, Field
-from typing_extensions import Annotated
 
 from llama_stack.models.llama.datatypes import Primitive
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@@ -37,11 +33,11 @@ class SpanStatus(Enum):
 class Span(BaseModel):
     span_id: str
     trace_id: str
-    parent_span_id: Optional[str] = None
+    parent_span_id: str | None = None
     name: str
     start_time: datetime
-    end_time: Optional[datetime] = None
-    attributes: Optional[Dict[str, Any]] = Field(default_factory=dict)
+    end_time: datetime | None = None
+    attributes: dict[str, Any] | None = Field(default_factory=lambda: {})
 
     def set_attribute(self, key: str, value: Any):
         if self.attributes is None:
@@ -54,7 +50,7 @@ class Trace(BaseModel):
     trace_id: str
     root_span_id: str
     start_time: datetime
-    end_time: Optional[datetime] = None
+    end_time: datetime | None = None
 
 
 @json_schema_type
@@ -78,29 +74,29 @@ class EventCommon(BaseModel):
     trace_id: str
     span_id: str
     timestamp: datetime
-    attributes: Optional[Dict[str, Primitive]] = Field(default_factory=dict)
+    attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {})
 
 
 @json_schema_type
 class UnstructuredLogEvent(EventCommon):
-    type: Literal[EventType.UNSTRUCTURED_LOG.value] = EventType.UNSTRUCTURED_LOG.value
+    type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
     message: str
     severity: LogSeverity
 
 
 @json_schema_type
 class MetricEvent(EventCommon):
-    type: Literal[EventType.METRIC.value] = EventType.METRIC.value
+    type: Literal[EventType.METRIC] = EventType.METRIC
     metric: str  # this would be an enum
-    value: Union[int, float]
+    value: int | float
     unit: str
 
 
 @json_schema_type
 class MetricInResponse(BaseModel):
     metric: str
-    value: Union[int, float]
-    unit: Optional[str] = None
+    value: int | float
+    unit: str | None = None
 
 
 # This is a short term solution to allow inference API to return metrics
@@ -124,7 +120,7 @@ class MetricInResponse(BaseModel):
 
 
 class MetricResponseMixin(BaseModel):
-    metrics: Optional[List[MetricInResponse]] = None
+    metrics: list[MetricInResponse] | None = None
 
 
 @json_schema_type
@@ -135,22 +131,19 @@ class StructuredLogType(Enum):
 
 @json_schema_type
 class SpanStartPayload(BaseModel):
-    type: Literal[StructuredLogType.SPAN_START.value] = StructuredLogType.SPAN_START.value
+    type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
     name: str
-    parent_span_id: Optional[str] = None
+    parent_span_id: str | None = None
 
 
 @json_schema_type
 class SpanEndPayload(BaseModel):
-    type: Literal[StructuredLogType.SPAN_END.value] = StructuredLogType.SPAN_END.value
+    type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
     status: SpanStatus
 
 
 StructuredLogPayload = Annotated[
-    Union[
-        SpanStartPayload,
-        SpanEndPayload,
-    ],
+    SpanStartPayload | SpanEndPayload,
     Field(discriminator="type"),
 ]
 register_schema(StructuredLogPayload, name="StructuredLogPayload")
@@ -158,16 +151,12 @@ register_schema(StructuredLogPayload, name="StructuredLogPayload")
 
 @json_schema_type
 class StructuredLogEvent(EventCommon):
-    type: Literal[EventType.STRUCTURED_LOG.value] = EventType.STRUCTURED_LOG.value
+    type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
     payload: StructuredLogPayload
 
 
 Event = Annotated[
-    Union[
-        UnstructuredLogEvent,
-        MetricEvent,
-        StructuredLogEvent,
-    ],
+    UnstructuredLogEvent | MetricEvent | StructuredLogEvent,
     Field(discriminator="type"),
 ]
 register_schema(Event, name="Event")
@@ -184,7 +173,7 @@ class EvalTrace(BaseModel):
 
 @json_schema_type
 class SpanWithStatus(Span):
-    status: Optional[SpanStatus] = None
+    status: SpanStatus | None = None
 
 
 @json_schema_type
@@ -203,58 +192,177 @@ class QueryCondition(BaseModel):
 
 
 class QueryTracesResponse(BaseModel):
-    data: List[Trace]
+    data: list[Trace]
 
 
 class QuerySpansResponse(BaseModel):
-    data: List[Span]
+    data: list[Span]
 
 
 class QuerySpanTreeResponse(BaseModel):
-    data: Dict[str, SpanWithStatus]
+    data: dict[str, SpanWithStatus]
+
+
+class MetricQueryType(Enum):
+    RANGE = "range"
+    INSTANT = "instant"
+
+
+class MetricLabelOperator(Enum):
+    EQUALS = "="
+    NOT_EQUALS = "!="
+    REGEX_MATCH = "=~"
+    REGEX_NOT_MATCH = "!~"
+
+
+class MetricLabelMatcher(BaseModel):
+    name: str
+    value: str
+    operator: MetricLabelOperator = MetricLabelOperator.EQUALS
+
+
+@json_schema_type
+class MetricLabel(BaseModel):
+    name: str
+    value: str
+
+
+@json_schema_type
+class MetricDataPoint(BaseModel):
+    timestamp: int
+    value: float
+
+
+@json_schema_type
+class MetricSeries(BaseModel):
+    metric: str
+    labels: list[MetricLabel]
+    values: list[MetricDataPoint]
+
+
+class QueryMetricsResponse(BaseModel):
+    data: list[MetricSeries]
 
 
 @runtime_checkable
 class Telemetry(Protocol):
     @webmethod(route="/telemetry/events", method="POST")
-    async def log_event(self, event: Event, ttl_seconds: int = DEFAULT_TTL_DAYS * 86400) -> None: ...
+    async def log_event(
+        self,
+        event: Event,
+        ttl_seconds: int = DEFAULT_TTL_DAYS * 86400,
+    ) -> None:
+        """Log an event.
+
+        :param event: The event to log.
+        :param ttl_seconds: The time to live of the event.
+        """
+        ...
 
     @webmethod(route="/telemetry/traces", method="POST")
     async def query_traces(
         self,
-        attribute_filters: Optional[List[QueryCondition]] = None,
-        limit: Optional[int] = 100,
-        offset: Optional[int] = 0,
-        order_by: Optional[List[str]] = None,
-    ) -> QueryTracesResponse: ...
+        attribute_filters: list[QueryCondition] | None = None,
+        limit: int | None = 100,
+        offset: int | None = 0,
+        order_by: list[str] | None = None,
+    ) -> QueryTracesResponse:
+        """Query traces.
+
+        :param attribute_filters: The attribute filters to apply to the traces.
+        :param limit: The limit of traces to return.
+        :param offset: The offset of the traces to return.
+        :param order_by: The order by of the traces to return.
+        :returns: A QueryTracesResponse.
+        """
+        ...
 
     @webmethod(route="/telemetry/traces/{trace_id:path}", method="GET")
-    async def get_trace(self, trace_id: str) -> Trace: ...
+    async def get_trace(self, trace_id: str) -> Trace:
+        """Get a trace by its ID.
+
+        :param trace_id: The ID of the trace to get.
+        :returns: A Trace.
+        """
+        ...
 
     @webmethod(route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET")
-    async def get_span(self, trace_id: str, span_id: str) -> Span: ...
+    async def get_span(self, trace_id: str, span_id: str) -> Span:
+        """Get a span by its ID.
+
+        :param trace_id: The ID of the trace to get the span from.
+        :param span_id: The ID of the span to get.
+        :returns: A Span.
+        """
+        ...
 
     @webmethod(route="/telemetry/spans/{span_id:path}/tree", method="POST")
     async def get_span_tree(
         self,
         span_id: str,
-        attributes_to_return: Optional[List[str]] = None,
-        max_depth: Optional[int] = None,
-    ) -> QuerySpanTreeResponse: ...
+        attributes_to_return: list[str] | None = None,
+        max_depth: int | None = None,
+    ) -> QuerySpanTreeResponse:
+        """Get a span tree by its ID.
+
+        :param span_id: The ID of the span to get the tree from.
+        :param attributes_to_return: The attributes to return in the tree.
+        :param max_depth: The maximum depth of the tree.
+        :returns: A QuerySpanTreeResponse.
+        """
+        ...
 
     @webmethod(route="/telemetry/spans", method="POST")
     async def query_spans(
         self,
-        attribute_filters: List[QueryCondition],
-        attributes_to_return: List[str],
-        max_depth: Optional[int] = None,
-    ) -> QuerySpansResponse: ...
+        attribute_filters: list[QueryCondition],
+        attributes_to_return: list[str],
+        max_depth: int | None = None,
+    ) -> QuerySpansResponse:
+        """Query spans.
+
+        :param attribute_filters: The attribute filters to apply to the spans.
+        :param attributes_to_return: The attributes to return in the spans.
+        :param max_depth: The maximum depth of the tree.
+        :returns: A QuerySpansResponse.
+        """
+        ...
 
     @webmethod(route="/telemetry/spans/export", method="POST")
     async def save_spans_to_dataset(
         self,
-        attribute_filters: List[QueryCondition],
-        attributes_to_save: List[str],
+        attribute_filters: list[QueryCondition],
+        attributes_to_save: list[str],
         dataset_id: str,
-        max_depth: Optional[int] = None,
-    ) -> None: ...
+        max_depth: int | None = None,
+    ) -> None:
+        """Save spans to a dataset.
+
+        :param attribute_filters: The attribute filters to apply to the spans.
+        :param attributes_to_save: The attributes to save to the dataset.
+        :param dataset_id: The ID of the dataset to save the spans to.
+        :param max_depth: The maximum depth of the tree.
+        """
+        ...
+
+    @webmethod(route="/telemetry/metrics/{metric_name}", method="POST")
+    async def query_metrics(
+        self,
+        metric_name: str,
+        start_time: int,
+        end_time: int | None = None,
+        granularity: str | None = "1d",
+        query_type: MetricQueryType = MetricQueryType.RANGE,
+        label_matchers: list[MetricLabelMatcher] | None = None,
+    ) -> QueryMetricsResponse:
+        """Query metrics.
+
+        :param metric_name: The name of the metric to query.
+        :param start_time: The start time of the metric to query.
+        :param end_time: The end time of the metric to query.
+        :param granularity: The granularity of the metric to query.
+        :param query_type: The type of query to perform.
+        :param label_matchers: The label matchers to apply to the metric.
+        :returns: A QueryMetricsResponse.
+        """
+        ...
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
index 73b36e050..1e3542f74 100644
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@@ -5,10 +5,10 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Annotated, Any, Literal
 
-from pydantic import BaseModel, Field
-from typing_extensions import Annotated, Protocol, runtime_checkable
+from pydantic import BaseModel, Field, field_validator
+from typing_extensions import Protocol, runtime_checkable
 
 from llama_stack.apis.common.content_types import URL, InterleavedContent
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
@@ -29,13 +29,13 @@ class RAGDocument(BaseModel):
     document_id: str
     content: InterleavedContent | URL
     mime_type: str | None = None
-    metadata: Dict[str, Any] = Field(default_factory=dict)
+    metadata: dict[str, Any] = Field(default_factory=dict)
 
 
 @json_schema_type
 class RAGQueryResult(BaseModel):
-    content: Optional[InterleavedContent] = None
-    metadata: Dict[str, Any] = Field(default_factory=dict)
+    content: InterleavedContent | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
 
 
 @json_schema_type
@@ -59,10 +59,7 @@ class LLMRAGQueryGeneratorConfig(BaseModel):
 
 
 RAGQueryGeneratorConfig = Annotated[
-    Union[
-        DefaultRAGQueryGeneratorConfig,
-        LLMRAGQueryGeneratorConfig,
-    ],
+    DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
     Field(discriminator="type"),
 ]
 register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig")
@@ -70,11 +67,35 @@ register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig")
 
 @json_schema_type
 class RAGQueryConfig(BaseModel):
+    """
+    Configuration for the RAG query generation.
+
+    :param query_generator_config: Configuration for the query generator.
+    :param max_tokens_in_context: Maximum number of tokens in the context.
+    :param max_chunks: Maximum number of chunks to retrieve.
+    :param chunk_template: Template for formatting each retrieved chunk in the context.
+        Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
+        Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
+    :param mode: Search mode for retrieval—either "vector" or "keyword". Default "vector".
+    """
+
     # This config defines how a query is generated using the messages
     # for memory bank retrieval.
     query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
     max_tokens_in_context: int = 4096
     max_chunks: int = 5
+    chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
+    mode: str | None = None
+
+    @field_validator("chunk_template")
+    def validate_chunk_template(cls, v: str) -> str:
+        if "{chunk.content}" not in v:
+            raise ValueError("chunk_template must contain {chunk.content}")
+        if "{index}" not in v:
+            raise ValueError("chunk_template must contain {index}")
+        if len(v) == 0:
+            raise ValueError("chunk_template must not be empty")
+        return v
 
 
 @runtime_checkable
@@ -83,7 +104,7 @@ class RAGToolRuntime(Protocol):
     @webmethod(route="/tool-runtime/rag-tool/insert", method="POST")
     async def insert(
         self,
-        documents: List[RAGDocument],
+        documents: list[RAGDocument],
         vector_db_id: str,
         chunk_size_in_tokens: int = 512,
     ) -> None:
@@ -94,8 +115,8 @@ class RAGToolRuntime(Protocol):
     async def query(
         self,
         content: InterleavedContent,
-        vector_db_ids: List[str],
-        query_config: Optional[RAGQueryConfig] = None,
+        vector_db_ids: list[str],
+        query_config: RAGQueryConfig | None = None,
     ) -> RAGQueryResult:
         """Query the RAG system for context; typically invoked by the agent"""
         ...
diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py
index 4ca72f71d..0c8d47edf 100644
--- a/llama_stack/apis/tools/tools.py
+++ b/llama_stack/apis/tools/tools.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Literal
 
 from pydantic import BaseModel, Field
 from typing_extensions import Protocol, runtime_checkable
@@ -24,68 +24,60 @@ class ToolParameter(BaseModel):
     parameter_type: str
     description: str
     required: bool = Field(default=True)
-    default: Optional[Any] = None
-
-
-@json_schema_type
-class ToolHost(Enum):
-    distribution = "distribution"
-    client = "client"
-    model_context_protocol = "model_context_protocol"
+    default: Any | None = None
 
 
 @json_schema_type
 class Tool(Resource):
-    type: Literal[ResourceType.tool.value] = ResourceType.tool.value
+    type: Literal[ResourceType.tool] = ResourceType.tool
     toolgroup_id: str
-    tool_host: ToolHost
     description: str
-    parameters: List[ToolParameter]
-    metadata: Optional[Dict[str, Any]] = None
+    parameters: list[ToolParameter]
+    metadata: dict[str, Any] | None = None
 
 
 @json_schema_type
 class ToolDef(BaseModel):
     name: str
-    description: Optional[str] = None
-    parameters: Optional[List[ToolParameter]] = None
-    metadata: Optional[Dict[str, Any]] = None
+    description: str | None = None
+    parameters: list[ToolParameter] | None = None
+    metadata: dict[str, Any] | None = None
 
 
 @json_schema_type
 class ToolGroupInput(BaseModel):
     toolgroup_id: str
     provider_id: str
-    args: Optional[Dict[str, Any]] = None
-    mcp_endpoint: Optional[URL] = None
+    args: dict[str, Any] | None = None
+    mcp_endpoint: URL | None = None
 
 
 @json_schema_type
 class ToolGroup(Resource):
-    type: Literal[ResourceType.tool_group.value] = ResourceType.tool_group.value
-    mcp_endpoint: Optional[URL] = None
-    args: Optional[Dict[str, Any]] = None
+    type: Literal[ResourceType.tool_group] = ResourceType.tool_group
+    mcp_endpoint: URL | None = None
+    args: dict[str, Any] | None = None
 
 
 @json_schema_type
 class ToolInvocationResult(BaseModel):
-    content: Optional[InterleavedContent] = None
-    error_message: Optional[str] = None
-    error_code: Optional[int] = None
-    metadata: Optional[Dict[str, Any]] = None
+    content: InterleavedContent | None = None
+    error_message: str | None = None
+    error_code: int | None = None
+    metadata: dict[str, Any] | None = None
 
 
 class ToolStore(Protocol):
-    def get_tool(self, tool_name: str) -> Tool: ...
-    def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ...
+    async def get_tool(self, tool_name: str) -> Tool: ...
+    async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ...
 
 
 class ListToolGroupsResponse(BaseModel):
-    data: List[ToolGroup]
+    data: list[ToolGroup]
 
 
 class ListToolsResponse(BaseModel):
-    data: List[Tool]
+    data: list[Tool]
 
 
 class ListToolDefsResponse(BaseModel):
@@ -100,40 +92,68 @@ class ToolGroups(Protocol):
         self,
         toolgroup_id: str,
         provider_id: str,
-        mcp_endpoint: Optional[URL] = None,
-        args: Optional[Dict[str, Any]] = None,
+        mcp_endpoint: URL | None = None,
+        args: dict[str, Any] | None = None,
     ) -> None:
-        """Register a tool group"""
+        """Register a tool group.
+
+        :param toolgroup_id: The ID of the tool group to register.
+        :param provider_id: The ID of the provider to use for the tool group.
+        :param mcp_endpoint: The MCP endpoint to use for the tool group.
+        :param args: A dictionary of arguments to pass to the tool group.
+        """
         ...
 
     @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET")
     async def get_tool_group(
         self,
         toolgroup_id: str,
-    ) -> ToolGroup: ...
+    ) -> ToolGroup:
+        """Get a tool group by its ID.
+
+        :param toolgroup_id: The ID of the tool group to get.
+        :returns: A ToolGroup.
+        """
+        ...
 
     @webmethod(route="/toolgroups", method="GET")
     async def list_tool_groups(self) -> ListToolGroupsResponse:
-        """List tool groups with optional provider"""
+        """List tool groups with optional provider.
+
+        :returns: A ListToolGroupsResponse.
+        """
         ...
 
     @webmethod(route="/tools", method="GET")
-    async def list_tools(self, toolgroup_id: Optional[str] = None) -> ListToolsResponse:
-        """List tools with optional tool group"""
+    async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
+        """List tools with optional tool group.
+
+        :param toolgroup_id: The ID of the tool group to list tools for.
+        :returns: A ListToolsResponse.
+        """
         ...
 
     @webmethod(route="/tools/{tool_name:path}", method="GET")
     async def get_tool(
         self,
         tool_name: str,
-    ) -> Tool: ...
+    ) -> Tool:
+        """Get a tool by its name.
+
+        :param tool_name: The name of the tool to get.
+        :returns: A Tool.
+        """
+        ...
 
     @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE")
     async def unregister_toolgroup(
         self,
         toolgroup_id: str,
     ) -> None:
-        """Unregister a tool group"""
+        """Unregister a tool group.
+
+        :param toolgroup_id: The ID of the tool group to unregister.
+        """
         ...
 
 
@@ -151,10 +171,22 @@ class ToolRuntime(Protocol):
     # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
     @webmethod(route="/tool-runtime/list-tools", method="GET")
     async def list_runtime_tools(
-        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
-    ) -> ListToolDefsResponse: ...
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+    ) -> ListToolDefsResponse:
+        """List all tools in the runtime.
+
+        :param tool_group_id: The ID of the tool group to list tools for.
+        :param mcp_endpoint: The MCP endpoint to use for the tool group.
+        :returns: A ListToolDefsResponse.
+        """
+        ...
 
     @webmethod(route="/tool-runtime/invoke", method="POST")
-    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
-        """Run a tool with the given arguments"""
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+        """Run a tool with the given arguments.
+
+        :param tool_name: The name of the tool to invoke.
+        :param kwargs: A dictionary of arguments to pass to the tool.
+        :returns: A ToolInvocationResult.
+        """
         ...
diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py
index fe6c33919..405852476 100644
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List, Literal, Optional, Protocol, runtime_checkable
+from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
@@ -15,7 +15,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 
 @json_schema_type
 class VectorDB(Resource):
-    type: Literal[ResourceType.vector_db.value] = ResourceType.vector_db.value
+    type: Literal[ResourceType.vector_db] = ResourceType.vector_db
 
     embedding_model: str
     embedding_dimension: int
@@ -25,7 +25,7 @@ class VectorDB(Resource):
         return self.identifier
 
     @property
-    def provider_vector_db_id(self) -> str:
+    def provider_vector_db_id(self) -> str | None:
         return self.provider_resource_id
 
 
@@ -33,34 +33,60 @@ class VectorDBInput(BaseModel):
     vector_db_id: str
     embedding_model: str
     embedding_dimension: int
-    provider_vector_db_id: Optional[str] = None
+    provider_vector_db_id: str | None = None
 
 
 class ListVectorDBsResponse(BaseModel):
-    data: List[VectorDB]
+    data: list[VectorDB]
 
 
 @runtime_checkable
 @trace_protocol
 class VectorDBs(Protocol):
     @webmethod(route="/vector-dbs", method="GET")
-    async def list_vector_dbs(self) -> ListVectorDBsResponse: ...
+    async def list_vector_dbs(self) -> ListVectorDBsResponse:
+        """List all vector databases.
+
+        :returns: A ListVectorDBsResponse.
+        """
+        ...
 
     @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET")
     async def get_vector_db(
         self,
         vector_db_id: str,
-    ) -> VectorDB: ...
+    ) -> VectorDB:
+        """Get a vector database by its identifier.
+
+        :param vector_db_id: The identifier of the vector database to get.
+        :returns: A VectorDB.
+        """
+        ...
 
     @webmethod(route="/vector-dbs", method="POST")
     async def register_vector_db(
         self,
         vector_db_id: str,
         embedding_model: str,
-        embedding_dimension: Optional[int] = 384,
-        provider_id: Optional[str] = None,
-        provider_vector_db_id: Optional[str] = None,
-    ) -> VectorDB: ...
+        embedding_dimension: int | None = 384,
+        provider_id: str | None = None,
+        provider_vector_db_id: str | None = None,
+    ) -> VectorDB:
+        """Register a vector database.
+
+        :param vector_db_id: The identifier of the vector database to register.
+        :param embedding_model: The embedding model to use.
+        :param embedding_dimension: The dimension of the embedding model.
+        :param provider_id: The identifier of the provider.
+        :param provider_vector_db_id: The identifier of the vector database in the provider.
+        :returns: A VectorDB.
+        """
+        ...
 
     @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE")
-    async def unregister_vector_db(self, vector_db_id: str) -> None: ...
+    async def unregister_vector_db(self, vector_db_id: str) -> None:
+        """Unregister a vector database.
+
+        :param vector_db_id: The identifier of the vector database to unregister.
+        """
+        ...
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index ab0a4a20a..44cc8f904 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -8,7 +8,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
+from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
@@ -19,18 +19,26 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 
 
 class Chunk(BaseModel):
+    """
+    A chunk of content that can be inserted into a vector database.
+    :param content: The content of the chunk, which can be interleaved text, images, or other types.
+    :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
+    :param metadata: Metadata associated with the chunk, such as document ID, source, or other relevant information.
+    """
+
     content: InterleavedContent
-    metadata: Dict[str, Any] = Field(default_factory=dict)
+    metadata: dict[str, Any] = Field(default_factory=dict)
+    embedding: list[float] | None = None
 
 
 @json_schema_type
 class QueryChunksResponse(BaseModel):
-    chunks: List[Chunk]
-    scores: List[float]
+    chunks: list[Chunk]
+    scores: list[float]
 
 
 class VectorDBStore(Protocol):
-    def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: ...
+    def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
 
 
 @runtime_checkable
@@ -44,14 +52,32 @@ class VectorIO(Protocol):
     async def insert_chunks(
         self,
         vector_db_id: str,
-        chunks: List[Chunk],
-        ttl_seconds: Optional[int] = None,
-    ) -> None: ...
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
+    ) -> None:
+        """Insert chunks into a vector database.
+
+        :param vector_db_id: The identifier of the vector database to insert the chunks into.
+        :param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types.
+            `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional.
+            If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
+            If `embedding` is not provided, it will be computed later.
+        :param ttl_seconds: The time to live of the chunks.
+        """
+        ...
 
     @webmethod(route="/vector-io/query", method="POST")
     async def query_chunks(
         self,
         vector_db_id: str,
         query: InterleavedContent,
-        params: Optional[Dict[str, Any]] = None,
-    ) -> QueryChunksResponse: ...
+        params: dict[str, Any] | None = None,
+    ) -> QueryChunksResponse:
+        """Query chunks from a vector database.
+
+        :param vector_db_id: The identifier of the vector database to query.
+        :param query: The query to search for.
+        :param params: The parameters of the query.
+        :returns: A QueryChunksResponse.
+        """
+        ...
diff --git a/llama_stack/cli/download.py b/llama_stack/cli/download.py
index 9694bf22d..b96842119 100644
--- a/llama_stack/cli/download.py
+++ b/llama_stack/cli/download.py
@@ -9,11 +9,11 @@ import asyncio
 import json
 import os
 import shutil
+import sys
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from functools import partial
 from pathlib import Path
-from typing import Dict, List, Optional
 
 import httpx
 from pydantic import BaseModel, ConfigDict
@@ -102,7 +102,7 @@ class DownloadTask:
     output_file: str
     total_size: int = 0
     downloaded_size: int = 0
-    task_id: Optional[int] = None
+    task_id: int | None = None
     retries: int = 0
     max_retries: int = 3
 
@@ -262,7 +262,7 @@ class ParallelDownloader:
             self.progress.update(task.task_id, description=f"[red]Failed: {task.output_file}[/red]")
             raise DownloadError(f"Download failed for {task.output_file}: {str(e)}") from e
 
-    def has_disk_space(self, tasks: List[DownloadTask]) -> bool:
+    def has_disk_space(self, tasks: list[DownloadTask]) -> bool:
         try:
             total_remaining_size = sum(task.total_size - task.downloaded_size for task in tasks)
             dir_path = os.path.dirname(os.path.abspath(tasks[0].output_file))
@@ -282,7 +282,7 @@ class ParallelDownloader:
         except Exception as e:
             raise DownloadError(f"Failed to check disk space: {str(e)}") from e
 
-    async def download_all(self, tasks: List[DownloadTask]) -> None:
+    async def download_all(self, tasks: list[DownloadTask]) -> None:
         if not tasks:
             raise ValueError("No download tasks provided")
 
@@ -378,33 +378,34 @@ def _meta_download(
     downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads)
     asyncio.run(downloader.download_all(tasks))
 
-    cprint(f"\nSuccessfully downloaded model to {output_dir}", "green")
+    cprint(f"\nSuccessfully downloaded model to {output_dir}", color="green", file=sys.stderr)
     cprint(
         f"\nView MD5 checksum files at: {output_dir / 'checklist.chk'}",
-        "white",
+        file=sys.stderr,
     )
     cprint(
         f"\n[Optionally] To run MD5 checksums, use the following command: llama model verify-download --model-id {model_id}",
-        "yellow",
+        color="yellow",
+        file=sys.stderr,
     )
 
 
 class ModelEntry(BaseModel):
     model_id: str
-    files: Dict[str, str]
+    files: dict[str, str]
 
     model_config = ConfigDict(protected_namespaces=())
 
 
 class Manifest(BaseModel):
-    models: List[ModelEntry]
+    models: list[ModelEntry]
     expires_on: datetime
 
 
 def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
     from llama_stack.distribution.utils.model_utils import model_local_dir
 
-    with open(manifest_file, "r") as f:
+    with open(manifest_file) as f:
         d = json.load(f)
         manifest = Manifest(**d)
 
@@ -460,15 +461,17 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
         from llama_stack.models.llama.sku_list import llama_meta_net_info, resolve_model
 
         from .model.safety_models import (
-            prompt_guard_download_info,
-            prompt_guard_model_sku,
+            prompt_guard_download_info_map,
+            prompt_guard_model_sku_map,
         )
 
-        prompt_guard = prompt_guard_model_sku()
+        prompt_guard_model_sku_map = prompt_guard_model_sku_map()
+        prompt_guard_download_info_map = prompt_guard_download_info_map()
+
         for model_id in model_ids:
-            if model_id == prompt_guard.model_id:
-                model = prompt_guard
-                info = prompt_guard_download_info()
+            if model_id in prompt_guard_model_sku_map.keys():
+                model = prompt_guard_model_sku_map[model_id]
+                info = prompt_guard_download_info_map[model_id]
             else:
                 model = resolve_model(model_id)
                 if model is None:
diff --git a/llama_stack/cli/llama.py b/llama_stack/cli/llama.py
index 8ff580029..433b311e7 100644
--- a/llama_stack/cli/llama.py
+++ b/llama_stack/cli/llama.py
@@ -38,7 +38,10 @@ class LlamaCLIParser:
         print_subcommand_description(self.parser, subparsers)
 
     def parse_args(self) -> argparse.Namespace:
-        return self.parser.parse_args()
+        args = self.parser.parse_args()
+        if not isinstance(args, argparse.Namespace):
+            raise TypeError(f"Expected argparse.Namespace, got {type(args)}")
+        return args
 
     def run(self, args: argparse.Namespace) -> None:
         args.func(args)
diff --git a/llama_stack/cli/model/describe.py b/llama_stack/cli/model/describe.py
index 62dde36e8..26b0da686 100644
--- a/llama_stack/cli/model/describe.py
+++ b/llama_stack/cli/model/describe.py
@@ -36,11 +36,11 @@ class ModelDescribe(Subcommand):
         )
 
     def _run_model_describe_cmd(self, args: argparse.Namespace) -> None:
-        from .safety_models import prompt_guard_model_sku
+        from .safety_models import prompt_guard_model_sku_map
 
-        prompt_guard = prompt_guard_model_sku()
-        if args.model_id == prompt_guard.model_id:
-            model = prompt_guard
+        prompt_guard_model_map = prompt_guard_model_sku_map()
+        if args.model_id in prompt_guard_model_map.keys():
+            model = prompt_guard_model_map[args.model_id]
         else:
             model = resolve_model(args.model_id)
 
diff --git a/llama_stack/cli/model/list.py b/llama_stack/cli/model/list.py
index b9499f06d..cf84dd526 100644
--- a/llama_stack/cli/model/list.py
+++ b/llama_stack/cli/model/list.py
@@ -84,7 +84,7 @@ class ModelList(Subcommand):
         )
 
     def _run_model_list_cmd(self, args: argparse.Namespace) -> None:
-        from .safety_models import prompt_guard_model_sku
+        from .safety_models import prompt_guard_model_skus
 
         if args.downloaded:
             return _run_model_list_downloaded_cmd()
@@ -96,7 +96,7 @@ class ModelList(Subcommand):
         ]
 
         rows = []
-        for model in all_registered_models() + [prompt_guard_model_sku()]:
+        for model in all_registered_models() + prompt_guard_model_skus():
             if not args.show_all and not model.is_featured:
                 continue
 
diff --git a/llama_stack/cli/model/remove.py b/llama_stack/cli/model/remove.py
index ee8d6299d..98710d82b 100644
--- a/llama_stack/cli/model/remove.py
+++ b/llama_stack/cli/model/remove.py
@@ -42,11 +42,12 @@ class ModelRemove(Subcommand):
         )
 
     def _run_model_remove_cmd(self, args: argparse.Namespace) -> None:
-        from .safety_models import prompt_guard_model_sku
+        from .safety_models import prompt_guard_model_sku_map
 
-        prompt_guard = prompt_guard_model_sku()
-        if args.model == prompt_guard.model_id:
-            model = prompt_guard
+        prompt_guard_model_map = prompt_guard_model_sku_map()
+
+        if args.model in prompt_guard_model_map.keys():
+            model = prompt_guard_model_map[args.model]
         else:
             model = resolve_model(args.model)
 
diff --git a/llama_stack/cli/model/safety_models.py b/llama_stack/cli/model/safety_models.py
index 131d055aa..e31767f13 100644
--- a/llama_stack/cli/model/safety_models.py
+++ b/llama_stack/cli/model/safety_models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel, ConfigDict, Field
 
@@ -15,14 +15,14 @@ from llama_stack.models.llama.sku_types import CheckpointQuantizationFormat
 class PromptGuardModel(BaseModel):
     """Make a 'fake' Model-like object for Prompt Guard. Eventually this will be removed."""
 
-    model_id: str = "Prompt-Guard-86M"
+    model_id: str
+    huggingface_repo: str
     description: str = "Prompt Guard. NOTE: this model will not be provided via `llama` CLI soon."
     is_featured: bool = False
-    huggingface_repo: str = "meta-llama/Prompt-Guard-86M"
-    max_seq_length: int = 2048
+    max_seq_length: int = 512
     is_instruct_model: bool = False
     quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
-    arch_args: Dict[str, Any] = Field(default_factory=dict)
+    arch_args: dict[str, Any] = Field(default_factory=dict)
 
     def descriptor(self) -> str:
         return self.model_id
@@ -30,18 +30,35 @@ class PromptGuardModel(BaseModel):
     model_config = ConfigDict(protected_namespaces=())
 
 
-def prompt_guard_model_sku():
-    return PromptGuardModel()
+def prompt_guard_model_skus():
+    return [
+        PromptGuardModel(model_id="Prompt-Guard-86M", huggingface_repo="meta-llama/Prompt-Guard-86M"),
+        PromptGuardModel(
+            model_id="Llama-Prompt-Guard-2-86M",
+            huggingface_repo="meta-llama/Llama-Prompt-Guard-2-86M",
+        ),
+        PromptGuardModel(
+            model_id="Llama-Prompt-Guard-2-22M",
+            huggingface_repo="meta-llama/Llama-Prompt-Guard-2-22M",
+        ),
+    ]
 
 
-def prompt_guard_download_info():
-    return LlamaDownloadInfo(
-        folder="Prompt-Guard",
-        files=[
-            "model.safetensors",
-            "special_tokens_map.json",
-            "tokenizer.json",
-            "tokenizer_config.json",
-        ],
-        pth_size=1,
-    )
+def prompt_guard_model_sku_map() -> dict[str, Any]:
+    return {model.model_id: model for model in prompt_guard_model_skus()}
+
+
+def prompt_guard_download_info_map() -> dict[str, LlamaDownloadInfo]:
+    return {
+        model.model_id: LlamaDownloadInfo(
+            folder="Prompt-Guard" if model.model_id == "Prompt-Guard-86M" else model.model_id,
+            files=[
+                "model.safetensors",
+                "special_tokens_map.json",
+                "tokenizer.json",
+                "tokenizer_config.json",
+            ],
+            pth_size=1,
+        )
+        for model in prompt_guard_model_skus()
+    }
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index ac1933e0e..f6f72946a 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -12,14 +12,14 @@ import shutil
 import sys
 import textwrap
 from functools import lru_cache
+from importlib.abc import Traversable
 from pathlib import Path
-from typing import Dict, Optional
 
 import yaml
 from prompt_toolkit import prompt
 from prompt_toolkit.completion import WordCompleter
 from prompt_toolkit.validation import Validator
-from termcolor import cprint
+from termcolor import colored, cprint
 
 from llama_stack.cli.stack.utils import ImageType
 from llama_stack.cli.table import print_table
@@ -37,7 +37,8 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.resolver import InvalidProviderError
-from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.distribution.stack import replace_env_vars
+from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.distribution.utils.exec import formulate_run_args, run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
@@ -46,14 +47,14 @@ from llama_stack.providers.datatypes import Api
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
 
 
-@lru_cache()
-def available_templates_specs() -> Dict[str, BuildConfig]:
+@lru_cache
+def available_templates_specs() -> dict[str, BuildConfig]:
     import yaml
 
     template_specs = {}
     for p in TEMPLATES_PATH.rglob("*build.yaml"):
         template_name = p.parent.name
-        with open(p, "r") as f:
+        with open(p) as f:
             build_config = BuildConfig(**yaml.safe_load(f))
             template_specs[template_name] = build_config
     return template_specs
@@ -78,6 +79,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
             cprint(
                 f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates",
                 color="red",
+                file=sys.stderr,
             )
             sys.exit(1)
         build_config = available_templates[args.template]
@@ -87,8 +89,50 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
             cprint(
                 f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {args.template}",
                 color="red",
+                file=sys.stderr,
             )
             sys.exit(1)
+    elif args.providers:
+        providers = dict()
+        for api_provider in args.providers.split(","):
+            if "=" not in api_provider:
+                cprint(
+                    "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
+                    color="red",
+                    file=sys.stderr,
+                )
+                sys.exit(1)
+            api, provider = api_provider.split("=")
+            providers_for_api = get_provider_registry().get(Api(api), None)
+            if providers_for_api is None:
+                cprint(
+                    f"{api} is not a valid API.",
+                    color="red",
+                    file=sys.stderr,
+                )
+                sys.exit(1)
+            if provider in providers_for_api:
+                providers.setdefault(api, []).append(provider)
+            else:
+                cprint(
+                    f"{provider} is not a valid provider for the {api} API.",
+                    color="red",
+                    file=sys.stderr,
+                )
+                sys.exit(1)
+        distribution_spec = DistributionSpec(
+            providers=providers,
+            description=",".join(args.providers),
+        )
+        if not args.image_type:
+            cprint(
+                f"Please specify a image-type (container | conda | venv) for {args.template}",
+                color="red",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+        build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec)
     elif not args.config and not args.template:
         name = prompt(
             "> Enter a name for your Llama Stack (e.g. my-local-stack): ",
@@ -99,12 +143,13 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
         )
 
         image_type = prompt(
-            f"> Enter the image type you want your Llama Stack to be built as ({' or '.join(e.value for e in ImageType)}): ",
+            "> Enter the image type you want your Llama Stack to be built as (use <TAB> to see options): ",
+            completer=WordCompleter([e.value for e in ImageType]),
+            complete_while_typing=True,
             validator=Validator.from_callable(
                 lambda x: x in [e.value for e in ImageType],
-                error_message=f"Invalid image type, please enter {' or '.join(e.value for e in ImageType)}",
+                error_message="Invalid image type. Use <TAB> to see options",
             ),
-            default=ImageType.CONDA.value,
         )
 
         if image_type == ImageType.CONDA.value:
@@ -112,12 +157,14 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                 cprint(
                     f"No current conda environment detected or specified, will create a new conda environment with the name `llamastack-{name}`",
                     color="yellow",
+                    file=sys.stderr,
                 )
                 image_name = f"llamastack-{name}"
             else:
                 cprint(
                     f"Using conda environment {image_name}",
                     color="green",
+                    file=sys.stderr,
                 )
         else:
             image_name = f"llamastack-{name}"
@@ -130,9 +177,10 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
             """,
             ),
             color="green",
+            file=sys.stderr,
         )
 
-        print("Tip: use <TAB> to see options for the providers.\n")
+        cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)
 
         providers = dict()
         for api, providers_for_api in get_provider_registry().items():
@@ -140,7 +188,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
             if not available_providers:
                 continue
             api_provider = prompt(
-                "> Enter provider for API {}: ".format(api.value),
+                f"> Enter provider for API {api.value}: ",
                 completer=WordCompleter(available_providers),
                 complete_while_typing=True,
                 validator=Validator.from_callable(
@@ -163,26 +211,24 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
 
         build_config = BuildConfig(image_type=image_type, distribution_spec=distribution_spec)
     else:
-        with open(args.config, "r") as f:
+        with open(args.config) as f:
             try:
-                build_config = BuildConfig(**yaml.safe_load(f))
+                contents = yaml.safe_load(f)
+                contents = replace_env_vars(contents)
+                build_config = BuildConfig(**contents)
+                if args.image_type:
+                    build_config.image_type = args.image_type
             except Exception as e:
                 cprint(
                     f"Could not parse config file {args.config}: {e}",
                     color="red",
+                    file=sys.stderr,
                 )
                 sys.exit(1)
 
-        if build_config.image_type == LlamaStackImageType.CONTAINER.value and not args.image_name:
-            cprint(
-                "Please specify --image-name when building a container from a config file",
-                color="red",
-            )
-            sys.exit(1)
-
     if args.print_deps_only:
         print(f"# Dependencies for {args.template or args.config or image_name}")
-        normal_deps, special_deps = get_provider_dependencies(build_config.distribution_spec.providers)
+        normal_deps, special_deps = get_provider_dependencies(build_config)
         normal_deps += SERVER_DEPENDENCIES
         print(f"uv pip install {' '.join(normal_deps)}")
         for special_dep in special_deps:
@@ -198,24 +244,32 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
         )
 
     except (Exception, RuntimeError) as exc:
+        import traceback
+
         cprint(
             f"Error building stack: {exc}",
             color="red",
+            file=sys.stderr,
         )
+        cprint("Stack trace:", color="red", file=sys.stderr)
+        traceback.print_exc()
         sys.exit(1)
+
     if run_config is None:
         cprint(
             "Run config path is empty",
             color="red",
+            file=sys.stderr,
         )
         sys.exit(1)
 
     if args.run:
-        run_config = Path(run_config)
         config_dict = yaml.safe_load(run_config.read_text())
         config = parse_and_maybe_upgrade_config(config_dict)
+        if config.external_providers_dir and not config.external_providers_dir.exists():
+            config.external_providers_dir.mkdir(exist_ok=True)
         run_args = formulate_run_args(args.image_type, args.image_name, config, args.template)
-        run_args.extend([run_config, str(os.getenv("LLAMA_STACK_PORT", 8321))])
+        run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", run_config])
         run_command(run_args)
 
 
@@ -223,7 +277,7 @@ def _generate_run_config(
     build_config: BuildConfig,
     build_dir: Path,
     image_name: str,
-) -> str:
+) -> Path:
     """
     Generate a run.yaml template file for user to edit from a build.yaml file
     """
@@ -233,9 +287,12 @@ def _generate_run_config(
         image_name=image_name,
         apis=apis,
         providers={},
+        external_providers_dir=build_config.external_providers_dir
+        if build_config.external_providers_dir
+        else EXTERNAL_PROVIDERS_DIR,
     )
     # build providers dict
-    provider_registry = get_provider_registry()
+    provider_registry = get_provider_registry(build_config)
     for api in apis:
         run_config.providers[api] = []
         provider_types = build_config.distribution_spec.providers[api]
@@ -249,8 +306,23 @@ def _generate_run_config(
             if p.deprecation_error:
                 raise InvalidProviderError(p.deprecation_error)
 
-            config_type = instantiate_class_type(provider_registry[Api(api)][provider_type].config_class)
-            if hasattr(config_type, "sample_run_config"):
+            try:
+                config_type = instantiate_class_type(provider_registry[Api(api)][provider_type].config_class)
+            except ModuleNotFoundError:
+                # HACK ALERT:
+                # This code executes after building is done, the import cannot work since the
+                # package is either available in the venv or container - not available on the host.
+                # TODO: use a "is_external" flag in ProviderSpec to check if the provider is
+                # external
+                cprint(
+                    f"Failed to import provider {provider_type} for API {api} - assuming it's external, skipping",
+                    color="yellow",
+                    file=sys.stderr,
+                )
+                # Set config_type to None to avoid UnboundLocalError
+                config_type = None
+
+            if config_type is not None and hasattr(config_type, "sample_run_config"):
                 config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
             else:
                 config = {}
@@ -268,20 +340,22 @@ def _generate_run_config(
         to_write = json.loads(run_config.model_dump_json())
         f.write(yaml.dump(to_write, sort_keys=False))
 
-    # this path is only invoked when no template is provided
-    cprint(
-        f"You can now run your stack with `llama stack run {run_config_file}`",
-        color="green",
-    )
+    # Only print this message for non-container builds since it will be displayed before the
+    # container is built
+    # For non-container builds, the run.yaml is generated at the very end of the build process so it
+    # makes sense to display this message
+    if build_config.image_type != LlamaStackImageType.CONTAINER.value:
+        cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
     return run_config_file
 
 
 def _run_stack_build_command_from_build_config(
     build_config: BuildConfig,
-    image_name: Optional[str] = None,
-    template_name: Optional[str] = None,
-    config_path: Optional[str] = None,
-) -> str:
+    image_name: str | None = None,
+    template_name: str | None = None,
+    config_path: str | None = None,
+) -> Path | Traversable:
+    image_name = image_name or build_config.image_name
     if build_config.image_type == LlamaStackImageType.CONTAINER.value:
         if template_name:
             image_name = f"distribution-{template_name}"
@@ -305,6 +379,13 @@ def _run_stack_build_command_from_build_config(
         build_file_path = build_dir / f"{image_name}-build.yaml"
 
     os.makedirs(build_dir, exist_ok=True)
+    run_config_file = None
+    # Generate the run.yaml so it can be included in the container image with the proper entrypoint
+    # Only do this if we're building a container image and we're not using a template
+    if build_config.image_type == LlamaStackImageType.CONTAINER.value and not template_name and config_path:
+        cprint("Generating run.yaml file", color="yellow", file=sys.stderr)
+        run_config_file = _generate_run_config(build_config, build_dir, image_name)
+
     with open(build_file_path, "w") as f:
         to_write = json.loads(build_config.model_dump_json())
         f.write(yaml.dump(to_write, sort_keys=False))
@@ -313,7 +394,8 @@ def _run_stack_build_command_from_build_config(
         build_config,
         build_file_path,
         image_name,
-        template_or_config=template_name or config_path,
+        template_or_config=template_name or config_path or str(build_file_path),
+        run_config=run_config_file,
     )
     if return_code != 0:
         raise RuntimeError(f"Failed to build image {image_name}")
@@ -325,7 +407,14 @@ def _run_stack_build_command_from_build_config(
             run_config_file = build_dir / f"{template_name}-run.yaml"
             shutil.copy(path, run_config_file)
 
-        cprint("Build Successful!", color="green")
+        cprint("Build Successful!", color="green", file=sys.stderr)
+        cprint(f"You can find the newly-built template here: {template_path}", color="light_blue", file=sys.stderr)
+        cprint(
+            "You can run the new Llama Stack distro via: "
+            + colored(f"llama stack run {template_path} --image-type {build_config.image_type}", "light_blue"),
+            color="green",
+            file=sys.stderr,
+        )
         return template_path
     else:
         return _generate_run_config(build_config, build_dir, image_name)
diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py
index c511a0682..2c402beeb 100644
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@@ -49,7 +49,7 @@ class StackBuild(Subcommand):
             type=str,
             help="Image Type to use for the build. If not specified, will use the image type from the template config.",
             choices=[e.value for e in ImageType],
-            default=ImageType.CONDA.value,
+            default=None,  # no default so we can detect if a user specified --image-type and override image_type in the config
         )
 
         self.parser.add_argument(
@@ -75,6 +75,12 @@ the build. If not specified, currently active environment will be used if found.
             default=False,
             help="Run the stack after building using the same image type, name, and other applicable arguments",
         )
+        self.parser.add_argument(
+            "--providers",
+            type=str,
+            default=None,
+            help="Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
+        )
 
     def _run_stack_build_command(self, args: argparse.Namespace) -> None:
         # always keep implementation completely silo-ed away from CLI so CLI
diff --git a/llama_stack/cli/stack/list_providers.py b/llama_stack/cli/stack/list_providers.py
index bfe11aa2c..deebd937b 100644
--- a/llama_stack/cli/stack/list_providers.py
+++ b/llama_stack/cli/stack/list_providers.py
@@ -46,7 +46,7 @@ class StackListProviders(Subcommand):
         else:
             providers = [(k.value, prov) for k, prov in all_providers.items()]
 
-        providers = [p for api, p in providers if api in self.providable_apis]
+        providers = [(api, p) for api, p in providers if api in self.providable_apis]
 
         # eventually, this should query a registry at llama.meta.com/llamastack/distributions
         headers = [
@@ -57,7 +57,7 @@ class StackListProviders(Subcommand):
 
         rows = []
 
-        specs = [spec for p in providers for spec in p.values()]
+        specs = [spec for api, p in providers for spec in p.values()]
         for spec in specs:
             if spec.is_sample:
                 continue
@@ -65,7 +65,7 @@ class StackListProviders(Subcommand):
                 [
                     spec.api.value,
                     spec.provider_type,
-                    ",".join(spec.pip_packages),
+                    ",".join(spec.pip_packages) if hasattr(spec, "pip_packages") else "",
                 ]
             )
         print_table(
diff --git a/llama_stack/cli/stack/list_stacks.py b/llama_stack/cli/stack/list_stacks.py
new file mode 100644
index 000000000..2ea0fdeea
--- /dev/null
+++ b/llama_stack/cli/stack/list_stacks.py
@@ -0,0 +1,56 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+from pathlib import Path
+
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.cli.table import print_table
+
+
+class StackListBuilds(Subcommand):
+    """List built stacks in .llama/distributions directory"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "list",
+            prog="llama stack list",
+            description="list the build stacks",
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._list_stack_command)
+
+    def _get_distribution_dirs(self) -> dict[str, Path]:
+        """Return a dictionary of distribution names and their paths"""
+        distributions = {}
+        dist_dir = Path.home() / ".llama" / "distributions"
+
+        if dist_dir.exists():
+            for stack_dir in dist_dir.iterdir():
+                if stack_dir.is_dir():
+                    distributions[stack_dir.name] = stack_dir
+        return distributions
+
+    def _list_stack_command(self, args: argparse.Namespace) -> None:
+        distributions = self._get_distribution_dirs()
+
+        if not distributions:
+            print("No stacks found in ~/.llama/distributions")
+            return
+
+        headers = ["Stack Name", "Path"]
+        headers.extend(["Build Config", "Run Config"])
+        rows = []
+        for name, path in distributions.items():
+            row = [name, str(path)]
+            # Check for build and run config files
+            build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
+            run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
+            row.extend([build_config, run_config])
+            rows.append(row)
+        print_table(rows, headers, separate_rows=True)
diff --git a/llama_stack/cli/stack/remove.py b/llama_stack/cli/stack/remove.py
new file mode 100644
index 000000000..a1796941e
--- /dev/null
+++ b/llama_stack/cli/stack/remove.py
@@ -0,0 +1,115 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+import shutil
+import sys
+from pathlib import Path
+
+from termcolor import cprint
+
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.cli.table import print_table
+
+
+class StackRemove(Subcommand):
+    """Remove the build stack"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "rm",
+            prog="llama stack rm",
+            description="Remove the build stack",
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._remove_stack_build_command)
+
+    def _add_arguments(self) -> None:
+        self.parser.add_argument(
+            "name",
+            type=str,
+            nargs="?",
+            help="Name of the stack to delete",
+        )
+        self.parser.add_argument(
+            "--all",
+            "-a",
+            action="store_true",
+            help="Delete all stacks (use with caution)",
+        )
+
+    def _get_distribution_dirs(self) -> dict[str, Path]:
+        """Return a dictionary of distribution names and their paths"""
+        distributions = {}
+        dist_dir = Path.home() / ".llama" / "distributions"
+
+        if dist_dir.exists():
+            for stack_dir in dist_dir.iterdir():
+                if stack_dir.is_dir():
+                    distributions[stack_dir.name] = stack_dir
+        return distributions
+
+    def _list_stacks(self) -> None:
+        """Display available stacks in a table"""
+        distributions = self._get_distribution_dirs()
+        if not distributions:
+            cprint("No stacks found in ~/.llama/distributions", color="red", file=sys.stderr)
+            sys.exit(1)
+
+        headers = ["Stack Name", "Path"]
+        rows = [[name, str(path)] for name, path in distributions.items()]
+        print_table(rows, headers, separate_rows=True)
+
+    def _remove_stack_build_command(self, args: argparse.Namespace) -> None:
+        distributions = self._get_distribution_dirs()
+
+        if args.all:
+            confirm = input("Are you sure you want to delete ALL stacks? [yes-i-really-want/N] ").lower()
+            if confirm != "yes-i-really-want":
+                cprint("Deletion cancelled.", color="green", file=sys.stderr)
+                return
+
+            for name, path in distributions.items():
+                try:
+                    shutil.rmtree(path)
+                    cprint(f"Deleted stack: {name}", color="green", file=sys.stderr)
+                except Exception as e:
+                    cprint(
+                        f"Failed to delete stack {name}: {e}",
+                        color="red",
+                        file=sys.stderr,
+                    )
+                    sys.exit(1)
+
+        if not args.name:
+            self._list_stacks()
+            if not args.name:
+                return
+
+        if args.name not in distributions:
+            self._list_stacks()
+            cprint(
+                f"Stack not found: {args.name}",
+                color="red",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+        stack_path = distributions[args.name]
+
+        confirm = input(f"Are you sure you want to delete stack '{args.name}'? [y/N] ").lower()
+        if confirm != "y":
+            cprint("Deletion cancelled.", color="green", file=sys.stderr)
+            return
+
+        try:
+            shutil.rmtree(stack_path)
+            cprint(f"Successfully deleted stack: {args.name}", color="green", file=sys.stderr)
+        except Exception as e:
+            cprint(f"Failed to delete stack {args.name}: {e}", color="red", file=sys.stderr)
+            sys.exit(1)
diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py
index d8234bb46..27745edac 100644
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@@ -6,6 +6,7 @@
 
 import argparse
 import os
+import subprocess
 from pathlib import Path
 
 from llama_stack.cli.stack.utils import ImageType
@@ -33,7 +34,8 @@ class StackRun(Subcommand):
         self.parser.add_argument(
             "config",
             type=str,
-            help="Path to config file to use for the run",
+            nargs="?",  # Make it optional
+            help="Path to config file to use for the run. Required for venv and conda environments.",
         )
         self.parser.add_argument(
             "--port",
@@ -47,34 +49,23 @@ class StackRun(Subcommand):
             default=os.environ.get("CONDA_DEFAULT_ENV"),
             help="Name of the image to run. Defaults to the current environment",
         )
-        self.parser.add_argument(
-            "--disable-ipv6",
-            action="store_true",
-            help="Disable IPv6 support",
-            default=False,
-        )
         self.parser.add_argument(
             "--env",
             action="append",
             help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
             metavar="KEY=VALUE",
         )
-        self.parser.add_argument(
-            "--tls-keyfile",
-            type=str,
-            help="Path to TLS key file for HTTPS",
-        )
-        self.parser.add_argument(
-            "--tls-certfile",
-            type=str,
-            help="Path to TLS certificate file for HTTPS",
-        )
         self.parser.add_argument(
             "--image-type",
             type=str,
             help="Image Type used during the build. This can be either conda or container or venv.",
             choices=[e.value for e in ImageType],
         )
+        self.parser.add_argument(
+            "--enable-ui",
+            action="store_true",
+            help="Start the UI server",
+        )
 
     # If neither image type nor image name is provided, but at the same time
     # the current environment has conda breadcrumbs, then assume what the user
@@ -98,44 +89,57 @@ class StackRun(Subcommand):
         from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
         from llama_stack.distribution.utils.exec import formulate_run_args, run_command
 
-        config_file = Path(args.config)
-        has_yaml_suffix = args.config.endswith(".yaml")
-        template_name = None
-
-        if not config_file.exists() and not has_yaml_suffix:
-            # check if this is a template
-            config_file = Path(REPO_ROOT) / "llama_stack" / "templates" / args.config / "run.yaml"
-            if config_file.exists():
-                template_name = args.config
-
-        if not config_file.exists() and not has_yaml_suffix:
-            # check if it's a build config saved to ~/.llama dir
-            config_file = Path(DISTRIBS_BASE_DIR / f"llamastack-{args.config}" / f"{args.config}-run.yaml")
-
-        if not config_file.exists():
-            self.parser.error(
-                f"File {str(config_file)} does not exist.\n\nPlease run `llama stack build` to generate (and optionally edit) a run.yaml file"
-            )
-
-        if not config_file.is_file():
-            self.parser.error(
-                f"Config file must be a valid file path, '{config_file}’ is not a file: type={type(config_file)}"
-            )
-
-        logger.info(f"Using run configuration: {config_file}")
-
-        try:
-            config_dict = yaml.safe_load(config_file.read_text())
-        except yaml.parser.ParserError as e:
-            self.parser.error(f"failed to load config file '{config_file}':\n {e}")
-
-        try:
-            config = parse_and_maybe_upgrade_config(config_dict)
-        except AttributeError as e:
-            self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
-
+        if args.enable_ui:
+            self._start_ui_development_server(args.port)
         image_type, image_name = self._get_image_type_and_name(args)
 
+        # Check if config is required based on image type
+        if (image_type in [ImageType.CONDA.value, ImageType.VENV.value]) and not args.config:
+            self.parser.error("Config file is required for venv and conda environments")
+
+        if args.config:
+            config_file = Path(args.config)
+            has_yaml_suffix = args.config.endswith(".yaml")
+            template_name = None
+
+            if not config_file.exists() and not has_yaml_suffix:
+                # check if this is a template
+                config_file = Path(REPO_ROOT) / "llama_stack" / "templates" / args.config / "run.yaml"
+                if config_file.exists():
+                    template_name = args.config
+
+            if not config_file.exists() and not has_yaml_suffix:
+                # check if it's a build config saved to ~/.llama dir
+                config_file = Path(DISTRIBS_BASE_DIR / f"llamastack-{args.config}" / f"{args.config}-run.yaml")
+
+            if not config_file.exists():
+                self.parser.error(
+                    f"File {str(config_file)} does not exist.\n\nPlease run `llama stack build` to generate (and optionally edit) a run.yaml file"
+                )
+
+            if not config_file.is_file():
+                self.parser.error(
+                    f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}"
+                )
+
+            logger.info(f"Using run configuration: {config_file}")
+
+            try:
+                config_dict = yaml.safe_load(config_file.read_text())
+            except yaml.parser.ParserError as e:
+                self.parser.error(f"failed to load config file '{config_file}':\n {e}")
+
+            try:
+                config = parse_and_maybe_upgrade_config(config_dict)
+                if not os.path.exists(str(config.external_providers_dir)):
+                    os.makedirs(str(config.external_providers_dir), exist_ok=True)
+            except AttributeError as e:
+                self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
+        else:
+            config = None
+            config_file = None
+            template_name = None
+
         # If neither image type nor image name is provided, assume the server should be run directly
         # using the current environment packages.
         if not image_type and not image_name:
@@ -157,9 +161,10 @@ class StackRun(Subcommand):
         else:
             run_args = formulate_run_args(image_type, image_name, config, template_name)
 
-            run_args.extend([str(config_file), str(args.port)])
-            if args.disable_ipv6:
-                run_args.append("--disable-ipv6")
+            run_args.extend([str(args.port)])
+
+            if config_file:
+                run_args.extend(["--config", str(config_file)])
 
             if args.env:
                 for env_var in args.env:
@@ -172,6 +177,45 @@ class StackRun(Subcommand):
                         return
                     run_args.extend(["--env", f"{key}={value}"])
 
-            if args.tls_keyfile and args.tls_certfile:
-                run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
             run_command(run_args)
+
+    def _start_ui_development_server(self, stack_server_port: int):
+        logger.info("Attempting to start UI development server...")
+        # Check if npm is available
+        npm_check = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=False)
+        if npm_check.returncode != 0:
+            logger.warning(
+                f"'npm' command not found or not executable. UI development server will not be started. Error: {npm_check.stderr}"
+            )
+            return
+
+        ui_dir = REPO_ROOT / "llama_stack" / "ui"
+        logs_dir = Path("~/.llama/ui/logs").expanduser()
+        try:
+            # Create logs directory if it doesn't exist
+            logs_dir.mkdir(parents=True, exist_ok=True)
+
+            ui_stdout_log_path = logs_dir / "stdout.log"
+            ui_stderr_log_path = logs_dir / "stderr.log"
+
+            # Open log files in append mode
+            stdout_log_file = open(ui_stdout_log_path, "a")
+            stderr_log_file = open(ui_stderr_log_path, "a")
+
+            process = subprocess.Popen(
+                ["npm", "run", "dev"],
+                cwd=str(ui_dir),
+                stdout=stdout_log_file,
+                stderr=stderr_log_file,
+                env={**os.environ, "NEXT_PUBLIC_LLAMA_STACK_BASE_URL": f"http://localhost:{stack_server_port}"},
+            )
+            logger.info(f"UI development server process started in {ui_dir} with PID {process.pid}.")
+            logger.info(f"Logs: stdout -> {ui_stdout_log_path}, stderr -> {ui_stderr_log_path}")
+            logger.info(f"UI will be available at http://localhost:{os.getenv('LLAMA_STACK_UI_PORT', 8322)}")
+
+        except FileNotFoundError:
+            logger.error(
+                "Failed to start UI development server: 'npm' command not found. Make sure npm is installed and in your PATH."
+            )
+        except Exception as e:
+            logger.error(f"Failed to start UI development server in {ui_dir}: {e}")
diff --git a/llama_stack/cli/stack/stack.py b/llama_stack/cli/stack/stack.py
index ccf1a5ffc..3aff78e23 100644
--- a/llama_stack/cli/stack/stack.py
+++ b/llama_stack/cli/stack/stack.py
@@ -7,12 +7,14 @@
 import argparse
 from importlib.metadata import version
 
+from llama_stack.cli.stack.list_stacks import StackListBuilds
 from llama_stack.cli.stack.utils import print_subcommand_description
 from llama_stack.cli.subcommand import Subcommand
 
 from .build import StackBuild
 from .list_apis import StackListApis
 from .list_providers import StackListProviders
+from .remove import StackRemove
 from .run import StackRun
 
 
@@ -41,5 +43,6 @@ class StackParser(Subcommand):
         StackListApis.create(subparsers)
         StackListProviders.create(subparsers)
         StackRun.create(subparsers)
-
+        StackRemove.create(subparsers)
+        StackListBuilds.create(subparsers)
         print_subcommand_description(self.parser, subparsers)
diff --git a/llama_stack/cli/table.py b/llama_stack/cli/table.py
index bf59e6103..86c3adff2 100644
--- a/llama_stack/cli/table.py
+++ b/llama_stack/cli/table.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Iterable
+from collections.abc import Iterable
 
 from rich.console import Console
 from rich.table import Table
diff --git a/llama_stack/cli/verify_download.py b/llama_stack/cli/verify_download.py
index 1229e8601..3a1af3cbc 100644
--- a/llama_stack/cli/verify_download.py
+++ b/llama_stack/cli/verify_download.py
@@ -9,7 +9,6 @@ import hashlib
 from dataclasses import dataclass
 from functools import partial
 from pathlib import Path
-from typing import Dict, List, Optional
 
 from rich.console import Console
 from rich.progress import Progress, SpinnerColumn, TextColumn
@@ -21,7 +20,7 @@ from llama_stack.cli.subcommand import Subcommand
 class VerificationResult:
     filename: str
     expected_hash: str
-    actual_hash: Optional[str]
+    actual_hash: str | None
     exists: bool
     matches: bool
 
@@ -60,9 +59,9 @@ def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str:
     return md5_hash.hexdigest()
 
 
-def load_checksums(checklist_path: Path) -> Dict[str, str]:
+def load_checksums(checklist_path: Path) -> dict[str, str]:
     checksums = {}
-    with open(checklist_path, "r") as f:
+    with open(checklist_path) as f:
         for line in f:
             if line.strip():
                 md5sum, filepath = line.strip().split("  ", 1)
@@ -72,7 +71,7 @@ def load_checksums(checklist_path: Path) -> Dict[str, str]:
     return checksums
 
 
-def verify_files(model_dir: Path, checksums: Dict[str, str], console: Console) -> List[VerificationResult]:
+def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -> list[VerificationResult]:
     results = []
 
     with Progress(
diff --git a/llama_stack/distribution/access_control.py b/llama_stack/distribution/access_control.py
index 0651ab6eb..d560ec80f 100644
--- a/llama_stack/distribution/access_control.py
+++ b/llama_stack/distribution/access_control.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from llama_stack.distribution.datatypes import AccessAttributes
 from llama_stack.log import get_logger
@@ -14,8 +14,8 @@ logger = get_logger(__name__, category="core")
 
 def check_access(
     obj_identifier: str,
-    obj_attributes: Optional[AccessAttributes],
-    user_attributes: Optional[Dict[str, Any]] = None,
+    obj_attributes: AccessAttributes | None,
+    user_attributes: dict[str, Any] | None = None,
 ) -> bool:
     """Check if the current user has access to the given object, based on access attributes.
 
diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py
index a8ee372da..072f9c425 100644
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@@ -6,17 +6,18 @@
 
 import importlib.resources
 import logging
+import sys
 from pathlib import Path
-from typing import Dict, List
 
 from pydantic import BaseModel
 from termcolor import cprint
 
-from llama_stack.distribution.datatypes import BuildConfig, Provider
+from llama_stack.distribution.datatypes import BuildConfig
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.exec import run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
+from llama_stack.templates.template import DistributionTemplate
 
 log = logging.getLogger(__name__)
 
@@ -37,19 +38,35 @@ class ApiInput(BaseModel):
 
 
 def get_provider_dependencies(
-    config_providers: Dict[str, List[Provider]],
+    config: BuildConfig | DistributionTemplate,
 ) -> tuple[list[str], list[str]]:
     """Get normal and special dependencies from provider configuration."""
-    all_providers = get_provider_registry()
-    deps = []
+    # Extract providers based on config type
+    if isinstance(config, DistributionTemplate):
+        providers = config.providers
 
-    for api_str, provider_or_providers in config_providers.items():
-        providers_for_api = all_providers[Api(api_str)]
+        # TODO: This is a hack to get the dependencies for internal APIs into build
+        # We should have a better way to do this by formalizing the concept of "internal" APIs
+        # and providers, with a way to specify dependencies for them.
+        run_configs = config.run_configs
+        additional_pip_packages: list[str] = []
+        if run_configs:
+            for run_config in run_configs.values():
+                run_config_ = run_config.run_config(name="", providers={}, container_image=None)
+                if run_config_.inference_store:
+                    additional_pip_packages.extend(run_config_.inference_store.pip_packages)
+    elif isinstance(config, BuildConfig):
+        providers = config.distribution_spec.providers
+        additional_pip_packages = config.additional_pip_packages
+    deps = []
+    registry = get_provider_registry(config)
+    for api_str, provider_or_providers in providers.items():
+        providers_for_api = registry[Api(api_str)]
 
         providers = provider_or_providers if isinstance(provider_or_providers, list) else [provider_or_providers]
 
         for provider in providers:
-            # Providers from BuildConfig and RunConfig are subtly different – not great
+            # Providers from BuildConfig and RunConfig are subtly different - not great
             provider_type = provider if isinstance(provider, str) else provider.provider_type
 
             if provider_type not in providers_for_api:
@@ -68,18 +85,22 @@ def get_provider_dependencies(
         else:
             normal_deps.append(package)
 
+    if additional_pip_packages:
+        normal_deps.extend(additional_pip_packages)
+
     return list(set(normal_deps)), list(set(special_deps))
 
 
-def print_pip_install_help(providers: Dict[str, List[Provider]]):
-    normal_deps, special_deps = get_provider_dependencies(providers)
+def print_pip_install_help(config: BuildConfig):
+    normal_deps, special_deps = get_provider_dependencies(config)
 
     cprint(
         f"Please install needed dependencies using the following commands:\n\nuv pip install {' '.join(normal_deps)}",
-        "yellow",
+        color="yellow",
+        file=sys.stderr,
     )
     for special_dep in special_deps:
-        cprint(f"uv pip install {special_dep}", "yellow")
+        cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr)
     print()
 
 
@@ -88,10 +109,11 @@ def build_image(
     build_file_path: Path,
     image_name: str,
     template_or_config: str,
+    run_config: str | None = None,
 ):
     container_base = build_config.distribution_spec.container_image or "python:3.10-slim"
 
-    normal_deps, special_deps = get_provider_dependencies(build_config.distribution_spec.providers)
+    normal_deps, special_deps = get_provider_dependencies(build_config)
     normal_deps += SERVER_DEPENDENCIES
 
     if build_config.image_type == LlamaStackImageType.CONTAINER.value:
@@ -103,6 +125,11 @@ def build_image(
             container_base,
             " ".join(normal_deps),
         ]
+
+        # When building from a config file (not a template), include the run config path in the
+        # build arguments
+        if run_config is not None:
+            args.append(run_config)
     elif build_config.image_type == LlamaStackImageType.CONDA.value:
         script = str(importlib.resources.files("llama_stack") / "distribution/build_conda_env.sh")
         args = [
diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index ed83b7bff..c128729e1 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -19,12 +19,16 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
 # mounting is not supported by docker buildx, so we use COPY instead
 USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
 
+# Path to the run.yaml file in the container
+RUN_CONFIG_PATH=/app/run.yaml
+
+BUILD_CONTEXT_DIR=$(pwd)
+
 if [ "$#" -lt 4 ]; then
   # This only works for templates
-  echo "Usage: $0 <template_or_config> <image_name> <container_base> <pip_dependencies> [<special_pip_deps>]" >&2
+  echo "Usage: $0 <template_or_config> <image_name> <container_base> <pip_dependencies> [<run_config>] [<special_pip_deps>]" >&2
   exit 1
 fi
-
 set -euo pipefail
 
 template_or_config="$1"
@@ -35,8 +39,27 @@ container_base="$1"
 shift
 pip_dependencies="$1"
 shift
-special_pip_deps="${1:-}"
 
+# Handle optional arguments
+run_config=""
+special_pip_deps=""
+
+# Check if there are more arguments
+# The logics is becoming cumbersom, we should refactor it if we can do better
+if [ $# -gt 0 ]; then
+  # Check if the argument ends with .yaml
+  if [[ "$1" == *.yaml ]]; then
+    run_config="$1"
+    shift
+    # If there's another argument after .yaml, it must be special_pip_deps
+    if [ $# -gt 0 ]; then
+      special_pip_deps="$1"
+    fi
+  else
+    # If it's not .yaml, it must be special_pip_deps
+    special_pip_deps="$1"
+  fi
+fi
 
 # Define color codes
 RED='\033[0;31m'
@@ -72,9 +95,13 @@ if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
 FROM $container_base
 WORKDIR /app
 
-RUN dnf -y update && dnf install -y iputils net-tools wget \
+# We install the Python 3.11 dev headers and build tools so that any
+# C‑extension wheels (e.g. polyleven, faiss‑cpu) can compile successfully.
+
+RUN dnf -y update && dnf install -y iputils git net-tools wget \
     vim-minimal python3.11 python3.11-pip python3.11-wheel \
-    python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
+    python3.11-setuptools python3.11-devel gcc make && \
+    ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
 
 ENV UV_SYSTEM_PYTHON=1
 RUN pip install uv
@@ -86,7 +113,7 @@ WORKDIR /app
 
 RUN apt-get update && apt-get install -y \
        iputils-ping net-tools iproute2 dnsutils telnet \
-       curl wget telnet \
+       curl wget telnet git\
        procps psmisc lsof \
        traceroute \
        bubblewrap \
@@ -115,6 +142,53 @@ EOF
   done
 fi
 
+# Function to get Python command
+get_python_cmd() {
+    if is_command_available python; then
+        echo "python"
+    elif is_command_available python3; then
+        echo "python3"
+    else
+        echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2
+        exit 1
+    fi
+}
+
+# Add other required item commands generic to all containers
+add_to_container << EOF
+# Allows running as non-root user
+RUN mkdir -p /.llama/providers.d /.cache
+EOF
+
+if [ -n "$run_config" ]; then
+  # Copy the run config to the build context since it's an absolute path
+  cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
+  add_to_container << EOF
+COPY run.yaml $RUN_CONFIG_PATH
+EOF
+
+  # Parse the run.yaml configuration to identify external provider directories
+  # If external providers are specified, copy their directory to the container
+  # and update the configuration to reference the new container path
+  python_cmd=$(get_python_cmd)
+  external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
+  external_providers_dir=$(eval echo "$external_providers_dir")
+  if [ -n "$external_providers_dir" ] && [ -d "$external_providers_dir" ]; then
+    echo "Copying external providers directory: $external_providers_dir"
+    cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
+    add_to_container << EOF
+COPY providers.d /.llama/providers.d
+EOF
+    # Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d
+    if [ "$(uname)" = "Darwin" ]; then
+      sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+      rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
+    else
+      sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+    fi
+  fi
+fi
+
 stack_mount="/app/llama-stack-source"
 client_mount="/app/llama-stack-client-source"
 
@@ -174,23 +248,21 @@ fi
 RUN pip uninstall -y uv
 EOF
 
-# if template_or_config ends with .yaml, it is not a template and we should not use the --template flag
-if [[ "$template_or_config" != *.yaml ]]; then
+# If a run config is provided, we use the --config flag
+if [[ -n "$run_config" ]]; then
+  add_to_container << EOF
+ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "$RUN_CONFIG_PATH"]
+EOF
+# If a template is provided (not a yaml file), we use the --template flag
+elif [[ "$template_or_config" != *.yaml ]]; then
   add_to_container << EOF
 ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "$template_or_config"]
 EOF
-else
-  add_to_container << EOF
-ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server"]
-EOF
 fi
 
 # Add other require item commands genearic to all containers
 add_to_container << EOF
 
-# Allows running as non-root user
-RUN mkdir -p /.llama /.cache
-
 RUN chmod -R g+rw /app /.llama /.cache
 EOF
 
@@ -254,9 +326,10 @@ $CONTAINER_BINARY build \
   "${CLI_ARGS[@]}" \
   -t "$image_tag" \
   -f "$TEMP_DIR/Containerfile" \
-  "."
+  "$BUILD_CONTEXT_DIR"
 
 # clean up tmp/configs
+rm -f "$BUILD_CONTEXT_DIR/run.yaml"
 set +x
 
 echo "Success!"
diff --git a/llama_stack/distribution/client.py b/llama_stack/distribution/client.py
index 1925b864f..03e4fb051 100644
--- a/llama_stack/distribution/client.py
+++ b/llama_stack/distribution/client.py
@@ -6,9 +6,10 @@
 
 import inspect
 import json
+import sys
 from collections.abc import AsyncIterator
 from enum import Enum
-from typing import Any, Type, Union, get_args, get_origin
+from typing import Any, Union, get_args, get_origin
 
 import httpx
 from pydantic import BaseModel, parse_obj_as
@@ -27,7 +28,7 @@ async def get_client_impl(protocol, config: RemoteProviderConfig, _deps: Any):
     return impl
 
 
-def create_api_client_class(protocol) -> Type:
+def create_api_client_class(protocol) -> type:
     if protocol in _CLIENT_CLASSES:
         return _CLIENT_CLASSES[protocol]
 
@@ -96,13 +97,13 @@ def create_api_client_class(protocol) -> Type:
                             try:
                                 data = json.loads(data)
                                 if "error" in data:
-                                    cprint(data, "red")
+                                    cprint(data, color="red", file=sys.stderr)
                                     continue
 
                                 yield parse_obj_as(return_type, data)
                             except Exception as e:
-                                print(f"Error with parsing or validation: {e}")
-                                print(data)
+                                cprint(f"Error with parsing or validation: {e}", color="red", file=sys.stderr)
+                                cprint(data, color="red", file=sys.stderr)
 
         def httpx_request_params(self, method_name: str, *args, **kwargs) -> dict:
             webmethod, sig = self.routes[method_name]
diff --git a/llama_stack/distribution/common.sh b/llama_stack/distribution/common.sh
index 15220048b..5f764bcca 100755
--- a/llama_stack/distribution/common.sh
+++ b/llama_stack/distribution/common.sh
@@ -1,3 +1,5 @@
+#!/usr/bin/env bash
+
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py
index 2a3bf7053..e58ea0338 100644
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 import logging
 import textwrap
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.distribution.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
@@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import (
     builtin_automatically_routed_apis,
     get_provider_registry,
 )
+from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
 from llama_stack.providers.datatypes import Api, ProviderSpec
@@ -24,7 +25,7 @@ from llama_stack.providers.datatypes import Api, ProviderSpec
 logger = logging.getLogger(__name__)
 
 
-def configure_single_provider(registry: Dict[str, ProviderSpec], provider: Provider) -> Provider:
+def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provider) -> Provider:
     provider_spec = registry[provider.provider_type]
     config_type = instantiate_class_type(provider_spec.config_class)
     try:
@@ -73,11 +74,7 @@ def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec
 
         existing_providers = config.providers.get(api_str, [])
         if existing_providers:
-            logger.info(
-                f"Re-configuring existing providers for API `{api_str}`...",
-                "green",
-                attrs=["bold"],
-            )
+            logger.info(f"Re-configuring existing providers for API `{api_str}`...")
             updated_providers = []
             for p in existing_providers:
                 logger.info(f"> Configuring provider `({p.provider_type})`")
@@ -91,7 +88,7 @@ def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec
             if not plist:
                 raise ValueError(f"No provider configured for API {api_str}?")
 
-            logger.info(f"Configuring API `{api_str}`...", "green", attrs=["bold"])
+            logger.info(f"Configuring API `{api_str}`...")
             updated_providers = []
             for i, provider_type in enumerate(plist):
                 if i >= 1:
@@ -120,8 +117,8 @@ def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec
 
 
 def upgrade_from_routing_table(
-    config_dict: Dict[str, Any],
-) -> Dict[str, Any]:
+    config_dict: dict[str, Any],
+) -> dict[str, Any]:
     def get_providers(entries):
         return [
             Provider(
@@ -163,7 +160,7 @@ def upgrade_from_routing_table(
     return config_dict
 
 
-def parse_and_maybe_upgrade_config(config_dict: Dict[str, Any]) -> StackRunConfig:
+def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
     version = config_dict.get("version", None)
     if version == LLAMA_STACK_RUN_CONFIG_VERSION:
         return StackRunConfig(**config_dict)
@@ -174,4 +171,7 @@ def parse_and_maybe_upgrade_config(config_dict: Dict[str, Any]) -> StackRunConfi
 
     config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
 
+    if not config_dict.get("external_providers_dir", None):
+        config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
+
     return StackRunConfig(**config_dict)
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index b24b0ec50..def7048c0 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -4,9 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Annotated, Any, Dict, List, Optional, Union
+from enum import Enum
+from pathlib import Path
+from typing import Annotated, Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 
 from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
 from llama_stack.apis.datasetio import DatasetIO
@@ -23,13 +25,14 @@ from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.providers.datatypes import Api, ProviderSpec
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig
+from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
 
 LLAMA_STACK_BUILD_CONFIG_VERSION = "2"
 LLAMA_STACK_RUN_CONFIG_VERSION = "2"
 
 
-RoutingKey = Union[str, List[str]]
+RoutingKey = str | list[str]
 
 
 class AccessAttributes(BaseModel):
@@ -46,17 +49,17 @@ class AccessAttributes(BaseModel):
     """
 
     # Standard attribute categories - the minimal set we need now
-    roles: Optional[List[str]] = Field(
+    roles: list[str] | None = Field(
         default=None, description="Role-based attributes (e.g., 'admin', 'data-scientist', 'user')"
     )
 
-    teams: Optional[List[str]] = Field(default=None, description="Team-based attributes (e.g., 'ml-team', 'nlp-team')")
+    teams: list[str] | None = Field(default=None, description="Team-based attributes (e.g., 'ml-team', 'nlp-team')")
 
-    projects: Optional[List[str]] = Field(
+    projects: list[str] | None = Field(
         default=None, description="Project-based access attributes (e.g., 'llama-3', 'customer-insights')"
     )
 
-    namespaces: Optional[List[str]] = Field(
+    namespaces: list[str] | None = Field(
         default=None, description="Namespace-based access control for resource isolation"
     )
 
@@ -105,7 +108,7 @@ class ResourceWithACL(Resource):
         # ^ User must have access to the customer-insights project AND have confidential namespace
     """
 
-    access_attributes: Optional[AccessAttributes] = None
+    access_attributes: AccessAttributes | None = None
 
 
 # Use the extended Resource for all routable objects
@@ -141,41 +144,21 @@ class ToolGroupWithACL(ToolGroup, ResourceWithACL):
     pass
 
 
-RoutableObject = Union[
-    Model,
-    Shield,
-    VectorDB,
-    Dataset,
-    ScoringFn,
-    Benchmark,
-    Tool,
-    ToolGroup,
-]
-
+RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | Tool | ToolGroup
 
 RoutableObjectWithProvider = Annotated[
-    Union[
-        ModelWithACL,
-        ShieldWithACL,
-        VectorDBWithACL,
-        DatasetWithACL,
-        ScoringFnWithACL,
-        BenchmarkWithACL,
-        ToolWithACL,
-        ToolGroupWithACL,
-    ],
+    ModelWithACL
+    | ShieldWithACL
+    | VectorDBWithACL
+    | DatasetWithACL
+    | ScoringFnWithACL
+    | BenchmarkWithACL
+    | ToolWithACL
+    | ToolGroupWithACL,
     Field(discriminator="type"),
 ]
 
-RoutedProtocol = Union[
-    Inference,
-    Safety,
-    VectorIO,
-    DatasetIO,
-    Scoring,
-    Eval,
-    ToolRuntime,
-]
+RoutedProtocol = Inference | Safety | VectorIO | DatasetIO | Scoring | Eval | ToolRuntime
 
 
 # Example: /inference, /safety
@@ -183,15 +166,15 @@ class AutoRoutedProviderSpec(ProviderSpec):
     provider_type: str = "router"
     config_class: str = ""
 
-    container_image: Optional[str] = None
+    container_image: str | None = None
     routing_table_api: Api
     module: str
-    provider_data_validator: Optional[str] = Field(
+    provider_data_validator: str | None = Field(
         default=None,
     )
 
     @property
-    def pip_packages(self) -> List[str]:
+    def pip_packages(self) -> list[str]:
         raise AssertionError("Should not be called on AutoRoutedProviderSpec")
 
 
@@ -199,20 +182,20 @@ class AutoRoutedProviderSpec(ProviderSpec):
 class RoutingTableProviderSpec(ProviderSpec):
     provider_type: str = "routing_table"
     config_class: str = ""
-    container_image: Optional[str] = None
+    container_image: str | None = None
 
     router_api: Api
     module: str
-    pip_packages: List[str] = Field(default_factory=list)
+    pip_packages: list[str] = Field(default_factory=list)
 
 
 class DistributionSpec(BaseModel):
-    description: Optional[str] = Field(
+    description: str | None = Field(
         default="",
         description="Description of the distribution",
     )
-    container_image: Optional[str] = None
-    providers: Dict[str, Union[str, List[str]]] = Field(
+    container_image: str | None = None
+    providers: dict[str, str | list[str]] = Field(
         default_factory=dict,
         description="""
 Provider Types for each of the APIs provided by this distribution. If you
@@ -224,22 +207,50 @@ in the runtime configuration to help route to the correct provider.""",
 class Provider(BaseModel):
     provider_id: str
     provider_type: str
-    config: Dict[str, Any]
+    config: dict[str, Any]
 
 
 class LoggingConfig(BaseModel):
-    category_levels: Dict[str, str] = Field(
-        default_factory=Dict,
+    category_levels: dict[str, str] = Field(
+        default_factory=dict,
         description="""
  Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
     )
 
 
+class AuthProviderType(str, Enum):
+    """Supported authentication provider types."""
+
+    OAUTH2_TOKEN = "oauth2_token"
+    CUSTOM = "custom"
+
+
 class AuthenticationConfig(BaseModel):
-    endpoint: str = Field(
+    provider_type: AuthProviderType = Field(
         ...,
-        description="Endpoint URL to validate authentication tokens",
+        description="Type of authentication provider",
     )
+    config: dict[str, Any] = Field(
+        ...,
+        description="Provider-specific configuration",
+    )
+
+
+class AuthenticationRequiredError(Exception):
+    pass
+
+
+class QuotaPeriod(str, Enum):
+    DAY = "day"
+
+
+class QuotaConfig(BaseModel):
+    kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
+    anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
+    authenticated_max_requests: int = Field(
+        default=1000, description="Max requests for authenticated clients per period"
+    )
+    period: QuotaPeriod = Field(default=QuotaPeriod.DAY, description="Quota period to set")
 
 
 class ServerConfig(BaseModel):
@@ -249,18 +260,30 @@ class ServerConfig(BaseModel):
         ge=1024,
         le=65535,
     )
-    tls_certfile: Optional[str] = Field(
+    tls_certfile: str | None = Field(
         default=None,
         description="Path to TLS certificate file for HTTPS",
     )
-    tls_keyfile: Optional[str] = Field(
+    tls_keyfile: str | None = Field(
         default=None,
         description="Path to TLS key file for HTTPS",
     )
-    auth: Optional[AuthenticationConfig] = Field(
+    tls_cafile: str | None = Field(
+        default=None,
+        description="Path to TLS CA file for HTTPS with mutual TLS authentication",
+    )
+    auth: AuthenticationConfig | None = Field(
         default=None,
         description="Authentication configuration for the server",
     )
+    host: str | None = Field(
+        default=None,
+        description="The host the server should listen on",
+    )
+    quota: QuotaConfig | None = Field(
+        default=None,
+        description="Per client quota request configuration",
+    )
 
 
 class StackRunConfig(BaseModel):
@@ -273,50 +296,66 @@ Reference to the distribution this package refers to. For unregistered (adhoc) p
 this could be just a hash
 """,
     )
-    container_image: Optional[str] = Field(
+    container_image: str | None = Field(
         default=None,
         description="Reference to the container image if this package refers to a container",
     )
-    apis: List[str] = Field(
+    apis: list[str] = Field(
         default_factory=list,
         description="""
 The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""",
     )
 
-    providers: Dict[str, List[Provider]] = Field(
+    providers: dict[str, list[Provider]] = Field(
         description="""
 One or more providers to use for each API. The same provider_type (e.g., meta-reference)
 can be instantiated multiple times (with different configs) if necessary.
 """,
     )
-    metadata_store: Optional[KVStoreConfig] = Field(
+    metadata_store: KVStoreConfig | None = Field(
         default=None,
         description="""
 Configuration for the persistence store used by the distribution registry. If not specified,
 a default SQLite store will be used.""",
     )
 
-    # registry of "resources" in the distribution
-    models: List[ModelInput] = Field(default_factory=list)
-    shields: List[ShieldInput] = Field(default_factory=list)
-    vector_dbs: List[VectorDBInput] = Field(default_factory=list)
-    datasets: List[DatasetInput] = Field(default_factory=list)
-    scoring_fns: List[ScoringFnInput] = Field(default_factory=list)
-    benchmarks: List[BenchmarkInput] = Field(default_factory=list)
-    tool_groups: List[ToolGroupInput] = Field(default_factory=list)
+    inference_store: SqlStoreConfig | None = Field(
+        default=None,
+        description="""
+Configuration for the persistence store used by the inference API. If not specified,
+a default SQLite store will be used.""",
+    )
 
-    logging: Optional[LoggingConfig] = Field(default=None, description="Configuration for Llama Stack Logging")
+    # registry of "resources" in the distribution
+    models: list[ModelInput] = Field(default_factory=list)
+    shields: list[ShieldInput] = Field(default_factory=list)
+    vector_dbs: list[VectorDBInput] = Field(default_factory=list)
+    datasets: list[DatasetInput] = Field(default_factory=list)
+    scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
+    benchmarks: list[BenchmarkInput] = Field(default_factory=list)
+    tool_groups: list[ToolGroupInput] = Field(default_factory=list)
+
+    logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
 
     server: ServerConfig = Field(
         default_factory=ServerConfig,
         description="Configuration for the HTTP(S) server",
     )
 
-    external_providers_dir: Optional[str] = Field(
+    external_providers_dir: Path | None = Field(
         default=None,
         description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.",
     )
 
+    @field_validator("external_providers_dir")
+    @classmethod
+    def validate_external_providers_dir(cls, v):
+        if v is None:
+            return None
+        if isinstance(v, str):
+            return Path(v)
+        return v
+
 
 class BuildConfig(BaseModel):
     version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
@@ -326,3 +365,25 @@ class BuildConfig(BaseModel):
         default="conda",
         description="Type of package to build (conda | container | venv)",
     )
+    image_name: str | None = Field(
+        default=None,
+        description="Name of the distribution to build",
+    )
+    external_providers_dir: Path | None = Field(
+        default=None,
+        description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
+        "pip_packages MUST contain the provider package name.",
+    )
+    additional_pip_packages: list[str] = Field(
+        default_factory=list,
+        description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.",
+    )
+
+    @field_validator("external_providers_dir")
+    @classmethod
+    def validate_external_providers_dir(cls, v):
+        if v is None:
+            return None
+        if isinstance(v, str):
+            return Path(v)
+        return v
diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py
index d4447139c..b860d15ab 100644
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@@ -7,12 +7,11 @@
 import glob
 import importlib
 import os
-from typing import Any, Dict, List
+from typing import Any
 
 import yaml
 from pydantic import BaseModel
 
-from llama_stack.distribution.datatypes import StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
     AdapterSpec,
@@ -25,7 +24,7 @@ from llama_stack.providers.datatypes import (
 logger = get_logger(name=__name__, category="core")
 
 
-def stack_apis() -> List[Api]:
+def stack_apis() -> list[Api]:
     return list(Api)
 
 
@@ -34,7 +33,7 @@ class AutoRoutedApiInfo(BaseModel):
     router_api: Api
 
 
-def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
+def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
     return [
         AutoRoutedApiInfo(
             routing_table_api=Api.models,
@@ -67,12 +66,12 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
     ]
 
 
-def providable_apis() -> List[Api]:
+def providable_apis() -> list[Api]:
     routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
     return [api for api in Api if api not in routing_table_apis and api != Api.inspect and api != Api.providers]
 
 
-def _load_remote_provider_spec(spec_data: Dict[str, Any], api: Api) -> ProviderSpec:
+def _load_remote_provider_spec(spec_data: dict[str, Any], api: Api) -> ProviderSpec:
     adapter = AdapterSpec(**spec_data["adapter"])
     spec = remote_provider_spec(
         api=api,
@@ -82,7 +81,7 @@ def _load_remote_provider_spec(spec_data: Dict[str, Any], api: Api) -> ProviderS
     return spec
 
 
-def _load_inline_provider_spec(spec_data: Dict[str, Any], api: Api, provider_name: str) -> ProviderSpec:
+def _load_inline_provider_spec(spec_data: dict[str, Any], api: Api, provider_name: str) -> ProviderSpec:
     spec = InlineProviderSpec(
         api=api,
         provider_type=f"inline::{provider_name}",
@@ -97,7 +96,9 @@ def _load_inline_provider_spec(spec_data: Dict[str, Any], api: Api, provider_nam
     return spec
 
 
-def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dict[str, ProviderSpec]]:
+def get_provider_registry(
+    config=None,
+) -> dict[Api, dict[str, ProviderSpec]]:
     """Get the provider registry, optionally including external providers.
 
     This function loads both built-in providers and external providers from YAML files.
@@ -122,7 +123,7 @@ def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dic
           llama-guard.yaml
 
     Args:
-        config: Optional StackRunConfig containing the external providers directory path
+        config: Optional object containing the external providers directory path
 
     Returns:
         A dictionary mapping APIs to their available providers
@@ -132,7 +133,7 @@ def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dic
         ValueError: If any provider spec is invalid
     """
 
-    ret: Dict[Api, Dict[str, ProviderSpec]] = {}
+    ret: dict[Api, dict[str, ProviderSpec]] = {}
     for api in providable_apis():
         name = api.name.lower()
         logger.debug(f"Importing module {name}")
@@ -142,8 +143,9 @@ def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dic
         except ImportError as e:
             logger.warning(f"Failed to import module {name}: {e}")
 
-    if config and config.external_providers_dir:
-        external_providers_dir = os.path.abspath(config.external_providers_dir)
+    # Check if config has the external_providers_dir attribute
+    if config and hasattr(config, "external_providers_dir") and config.external_providers_dir:
+        external_providers_dir = os.path.abspath(os.path.expanduser(config.external_providers_dir))
         if not os.path.exists(external_providers_dir):
             raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}")
         logger.info(f"Loading external providers from {external_providers_dir}")
diff --git a/llama_stack/distribution/inspect.py b/llama_stack/distribution/inspect.py
index ba0ce5ea2..5822070ad 100644
--- a/llama_stack/distribution/inspect.py
+++ b/llama_stack/distribution/inspect.py
@@ -16,7 +16,8 @@ from llama_stack.apis.inspect import (
     VersionInfo,
 )
 from llama_stack.distribution.datatypes import StackRunConfig
-from llama_stack.distribution.server.endpoints import get_all_api_endpoints
+from llama_stack.distribution.server.routes import get_all_api_routes
+from llama_stack.providers.datatypes import HealthStatus
 
 
 class DistributionInspectConfig(BaseModel):
@@ -30,7 +31,7 @@ async def get_provider_impl(config, deps):
 
 
 class DistributionInspectImpl(Inspect):
-    def __init__(self, config, deps):
+    def __init__(self, config: DistributionInspectConfig, deps):
         self.config = config
         self.deps = deps
 
@@ -38,27 +39,41 @@ class DistributionInspectImpl(Inspect):
         pass
 
     async def list_routes(self) -> ListRoutesResponse:
-        run_config = self.config.run_config
+        run_config: StackRunConfig = self.config.run_config
 
         ret = []
-        all_endpoints = get_all_api_endpoints()
+        all_endpoints = get_all_api_routes()
         for api, endpoints in all_endpoints.items():
-            providers = run_config.providers.get(api.value, [])
-            ret.extend(
-                [
-                    RouteInfo(
-                        route=e.route,
-                        method=e.method,
-                        provider_types=[p.provider_type for p in providers],
+            # Always include provider and inspect APIs, filter others based on run config
+            if api.value in ["providers", "inspect"]:
+                ret.extend(
+                    [
+                        RouteInfo(
+                            route=e.path,
+                            method=next(iter([m for m in e.methods if m != "HEAD"])),
+                            provider_types=[],  # These APIs don't have "real" providers - they're internal to the stack
+                        )
+                        for e in endpoints
+                    ]
+                )
+            else:
+                providers = run_config.providers.get(api.value, [])
+                if providers:  # Only process if there are providers for this API
+                    ret.extend(
+                        [
+                            RouteInfo(
+                                route=e.path,
+                                method=next(iter([m for m in e.methods if m != "HEAD"])),
+                                provider_types=[p.provider_type for p in providers],
+                            )
+                            for e in endpoints
+                        ]
                     )
-                    for e in endpoints
-                ]
-            )
 
         return ListRoutesResponse(data=ret)
 
     async def health(self) -> HealthInfo:
-        return HealthInfo(status="OK")
+        return HealthInfo(status=HealthStatus.OK)
 
     async def version(self) -> VersionInfo:
         return VersionInfo(version=version("llama-stack"))
diff --git a/llama_stack/distribution/library_client.py b/llama_stack/distribution/library_client.py
index c0143363d..f32130cf9 100644
--- a/llama_stack/distribution/library_client.py
+++ b/llama_stack/distribution/library_client.py
@@ -9,10 +9,11 @@ import inspect
 import json
 import logging
 import os
+import sys
 from concurrent.futures import ThreadPoolExecutor
 from enum import Enum
 from pathlib import Path
-from typing import Any, Optional, TypeVar, Union, get_args, get_origin
+from typing import Any, TypeVar, Union, get_args, get_origin
 
 import httpx
 import yaml
@@ -30,22 +31,19 @@ from termcolor import cprint
 
 from llama_stack.distribution.build import print_pip_install_help
 from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
-from llama_stack.distribution.datatypes import Api
+from llama_stack.distribution.datatypes import Api, BuildConfig, DistributionSpec
 from llama_stack.distribution.request_headers import (
     PROVIDER_DATA_VAR,
     request_provider_data_context,
 )
 from llama_stack.distribution.resolver import ProviderRegistry
-from llama_stack.distribution.server.endpoints import (
-    find_matching_endpoint,
-    initialize_endpoint_impls,
-)
+from llama_stack.distribution.server.routes import find_matching_route, initialize_route_impls
 from llama_stack.distribution.stack import (
     construct_stack,
     get_stack_run_config_from_template,
-    redact_sensitive_fields,
     replace_env_vars,
 )
+from llama_stack.distribution.utils.config import redact_sensitive_fields
 from llama_stack.distribution.utils.context import preserve_contexts_async_generator
 from llama_stack.distribution.utils.exec import in_notebook
 from llama_stack.providers.utils.telemetry.tracing import (
@@ -119,8 +117,8 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
         self,
         config_path_or_template_name: str,
         skip_logger_removal: bool = False,
-        custom_provider_registry: Optional[ProviderRegistry] = None,
-        provider_data: Optional[dict[str, Any]] = None,
+        custom_provider_registry: ProviderRegistry | None = None,
+        provider_data: dict[str, Any] | None = None,
     ):
         super().__init__()
         self.async_client = AsyncLlamaStackAsLibraryClient(
@@ -181,8 +179,8 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
     def __init__(
         self,
         config_path_or_template_name: str,
-        custom_provider_registry: Optional[ProviderRegistry] = None,
-        provider_data: Optional[dict[str, Any]] = None,
+        custom_provider_registry: ProviderRegistry | None = None,
+        provider_data: dict[str, Any] | None = None,
     ):
         super().__init__()
         # when using the library client, we should not log to console since many
@@ -207,22 +205,41 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
 
     async def initialize(self) -> bool:
         try:
-            self.endpoint_impls = None
+            self.route_impls = None
             self.impls = await construct_stack(self.config, self.custom_provider_registry)
         except ModuleNotFoundError as _e:
-            cprint(_e.msg, "red")
+            cprint(_e.msg, color="red", file=sys.stderr)
             cprint(
                 "Using llama-stack as a library requires installing dependencies depending on the template (providers) you choose.\n",
-                "yellow",
+                color="yellow",
+                file=sys.stderr,
             )
             if self.config_path_or_template_name.endswith(".yaml"):
-                print_pip_install_help(self.config.providers)
+                # Convert Provider objects to their types
+                provider_types: dict[str, str | list[str]] = {}
+                for api, providers in self.config.providers.items():
+                    types = [p.provider_type for p in providers]
+                    # Convert single-item lists to strings
+                    provider_types[api] = types[0] if len(types) == 1 else types
+                build_config = BuildConfig(
+                    distribution_spec=DistributionSpec(
+                        providers=provider_types,
+                    ),
+                    external_providers_dir=self.config.external_providers_dir,
+                )
+                print_pip_install_help(build_config)
             else:
                 prefix = "!" if in_notebook() else ""
                 cprint(
                     f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n",
                     "yellow",
+                    file=sys.stderr,
                 )
+            cprint(
+                "Please check your internet connection and try again.",
+                "red",
+                file=sys.stderr,
+            )
             raise _e
 
         if Api.telemetry in self.impls:
@@ -234,7 +251,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
             safe_config = redact_sensitive_fields(self.config.model_dump())
             console.print(yaml.dump(safe_config, indent=2))
 
-        self.endpoint_impls = initialize_endpoint_impls(self.impls)
+        self.route_impls = initialize_route_impls(self.impls)
         return True
 
     async def request(
@@ -245,13 +262,15 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         stream=False,
         stream_cls=None,
     ):
-        if not self.endpoint_impls:
+        if not self.route_impls:
             raise ValueError("Client not initialized")
 
         # Create headers with provider data if available
-        headers = {}
+        headers = options.headers or {}
         if self.provider_data:
-            headers["X-LlamaStack-Provider-Data"] = json.dumps(self.provider_data)
+            keys = ["X-LlamaStack-Provider-Data", "x-llamastack-provider-data"]
+            if all(key not in headers for key in keys):
+                headers["X-LlamaStack-Provider-Data"] = json.dumps(self.provider_data)
 
         # Use context manager for provider data
         with request_provider_data_context(headers):
@@ -274,11 +293,14 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         cast_to: Any,
         options: Any,
     ):
+        if self.route_impls is None:
+            raise ValueError("Client not initialized")
+
         path = options.url
         body = options.params or {}
         body |= options.json_data or {}
 
-        matched_func, path_params, route = find_matching_endpoint(options.method, path, self.endpoint_impls)
+        matched_func, path_params, route = find_matching_route(options.method, path, self.route_impls)
         body |= path_params
         body = self._convert_body(path, options.method, body)
         await start_trace(route, {"__location__": "library_client"})
@@ -320,10 +342,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         options: Any,
         stream_cls: Any,
     ):
+        if self.route_impls is None:
+            raise ValueError("Client not initialized")
+
         path = options.url
         body = options.params or {}
         body |= options.json_data or {}
-        func, path_params, route = find_matching_endpoint(options.method, path, self.endpoint_impls)
+        func, path_params, route = find_matching_route(options.method, path, self.route_impls)
         body |= path_params
 
         body = self._convert_body(path, options.method, body)
@@ -371,11 +396,14 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         )
         return await response.parse()
 
-    def _convert_body(self, path: str, method: str, body: Optional[dict] = None) -> dict:
+    def _convert_body(self, path: str, method: str, body: dict | None = None) -> dict:
         if not body:
             return {}
 
-        func, _, _ = find_matching_endpoint(method, path, self.endpoint_impls)
+        if self.route_impls is None:
+            raise ValueError("Client not initialized")
+
+        func, _, _ = find_matching_route(method, path, self.route_impls)
         sig = inspect.signature(func)
 
         # Strip NOT_GIVENs to use the defaults in signature
diff --git a/llama_stack/distribution/providers.py b/llama_stack/distribution/providers.py
index cf9b0b975..29b7109dd 100644
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@@ -4,14 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import asyncio
+from typing import Any
 
 from pydantic import BaseModel
 
 from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
 from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import HealthResponse, HealthStatus
 
 from .datatypes import StackRunConfig
-from .stack import redact_sensitive_fields
+from .utils.config import redact_sensitive_fields
 
 logger = get_logger(name=__name__, category="core")
 
@@ -41,19 +44,24 @@ class ProviderImpl(Providers):
     async def list_providers(self) -> ListProvidersResponse:
         run_config = self.config.run_config
         safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
+        providers_health = await self.get_providers_health()
         ret = []
         for api, providers in safe_config.providers.items():
-            ret.extend(
-                [
+            for p in providers:
+                ret.append(
                     ProviderInfo(
                         api=api,
                         provider_id=p.provider_id,
                         provider_type=p.provider_type,
                         config=p.config,
+                        health=providers_health.get(api, {}).get(
+                            p.provider_id,
+                            HealthResponse(
+                                status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
+                            ),
+                        ),
                     )
-                    for p in providers
-                ]
-            )
+                )
 
         return ListProvidersResponse(data=ret)
 
@@ -64,3 +72,57 @@ class ProviderImpl(Providers):
                 return p
 
         raise ValueError(f"Provider {provider_id} not found")
+
+    async def get_providers_health(self) -> dict[str, dict[str, HealthResponse]]:
+        """Get health status for all providers.
+
+        Returns:
+            Dict[str, Dict[str, HealthResponse]]: A dictionary mapping API names to provider health statuses.
+                Each API maps to a dictionary of provider IDs to their health responses.
+        """
+        providers_health: dict[str, dict[str, HealthResponse]] = {}
+        timeout = 1.0
+
+        async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None:
+            # Skip special implementations (inspect/providers) that don't have provider specs
+            if not hasattr(impl, "__provider_spec__"):
+                return None
+            api_name = impl.__provider_spec__.api.name
+            if not hasattr(impl, "health"):
+                return (
+                    api_name,
+                    HealthResponse(
+                        status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
+                    ),
+                )
+
+            try:
+                health = await asyncio.wait_for(impl.health(), timeout=timeout)
+                return api_name, health
+            except (asyncio.TimeoutError, TimeoutError):
+                return (
+                    api_name,
+                    HealthResponse(
+                        status=HealthStatus.ERROR, message=f"Health check timed out after {timeout} seconds"
+                    ),
+                )
+            except Exception as e:
+                return (
+                    api_name,
+                    HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"),
+                )
+
+        # Create tasks for all providers
+        tasks = [check_provider_health(impl) for impl in self.deps.values()]
+
+        # Wait for all health checks to complete
+        results = await asyncio.gather(*tasks)
+
+        # Organize results by API and provider ID
+        for result in results:
+            if result is None:  # Skip special implementations
+                continue
+            api_name, health_response = result
+            providers_health[api_name] = health_response
+
+        return providers_health
diff --git a/llama_stack/distribution/request_headers.py b/llama_stack/distribution/request_headers.py
index f9cde2cdf..b03d2dee8 100644
--- a/llama_stack/distribution/request_headers.py
+++ b/llama_stack/distribution/request_headers.py
@@ -7,7 +7,8 @@
 import contextvars
 import json
 import logging
-from typing import Any, ContextManager, Dict, List, Optional
+from contextlib import AbstractContextManager
+from typing import Any
 
 from .utils.dynamic import instantiate_class_type
 
@@ -17,11 +18,11 @@ log = logging.getLogger(__name__)
 PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)
 
 
-class RequestProviderDataContext(ContextManager):
+class RequestProviderDataContext(AbstractContextManager):
     """Context manager for request provider data"""
 
     def __init__(
-        self, provider_data: Optional[Dict[str, Any]] = None, auth_attributes: Optional[Dict[str, List[str]]] = None
+        self, provider_data: dict[str, Any] | None = None, auth_attributes: dict[str, list[str]] | None = None
     ):
         self.provider_data = provider_data or {}
         if auth_attributes:
@@ -43,7 +44,8 @@ class RequestProviderDataContext(ContextManager):
 class NeedsRequestProviderData:
     def get_request_provider_data(self) -> Any:
         spec = self.__provider_spec__
-        assert spec, f"Provider spec not set on {self.__class__}"
+        if not spec:
+            raise ValueError(f"Provider spec not set on {self.__class__}")
 
         provider_type = spec.provider_type
         validator_class = spec.provider_data_validator
@@ -63,7 +65,7 @@ class NeedsRequestProviderData:
             return None
 
 
-def parse_request_provider_data(headers: Dict[str, str]) -> Optional[Dict[str, Any]]:
+def parse_request_provider_data(headers: dict[str, str]) -> dict[str, Any] | None:
     """Parse provider data from request headers"""
     keys = [
         "X-LlamaStack-Provider-Data",
@@ -86,14 +88,14 @@ def parse_request_provider_data(headers: Dict[str, str]) -> Optional[Dict[str, A
 
 
 def request_provider_data_context(
-    headers: Dict[str, str], auth_attributes: Optional[Dict[str, List[str]]] = None
-) -> ContextManager:
+    headers: dict[str, str], auth_attributes: dict[str, list[str]] | None = None
+) -> AbstractContextManager:
     """Context manager that sets request provider data from headers and auth attributes for the duration of the context"""
     provider_data = parse_request_provider_data(headers)
     return RequestProviderDataContext(provider_data, auth_attributes)
 
 
-def get_auth_attributes() -> Optional[Dict[str, List[str]]]:
+def get_auth_attributes() -> dict[str, list[str]] | None:
     """Helper to retrieve auth attributes from the provider data context"""
     provider_data = PROVIDER_DATA_VAR.get()
     if not provider_data:
diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py
index 0de1e0a02..b7c7cb87f 100644
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 import importlib
 import inspect
-from typing import Any, Dict, List, Set, Tuple
+from typing import Any
 
 from llama_stack.apis.agents import Agents
 from llama_stack.apis.benchmarks import Benchmarks
@@ -13,7 +13,7 @@ from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
 from llama_stack.apis.eval import Eval
 from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
+from llama_stack.apis.inference import Inference, InferenceProvider
 from llama_stack.apis.inspect import Inspect
 from llama_stack.apis.models import Models
 from llama_stack.apis.post_training import PostTraining
@@ -41,14 +41,13 @@ from llama_stack.providers.datatypes import (
     Api,
     BenchmarksProtocolPrivate,
     DatasetsProtocolPrivate,
-    InlineProviderSpec,
     ModelsProtocolPrivate,
     ProviderSpec,
     RemoteProviderConfig,
     RemoteProviderSpec,
     ScoringFunctionsProtocolPrivate,
     ShieldsProtocolPrivate,
-    ToolsProtocolPrivate,
+    ToolGroupsProtocolPrivate,
     VectorDBsProtocolPrivate,
 )
 
@@ -59,7 +58,7 @@ class InvalidProviderError(Exception):
     pass
 
 
-def api_protocol_map() -> Dict[Api, Any]:
+def api_protocol_map() -> dict[Api, Any]:
     return {
         Api.providers: ProvidersAPI,
         Api.agents: Agents,
@@ -84,10 +83,17 @@ def api_protocol_map() -> Dict[Api, Any]:
     }
 
 
-def additional_protocols_map() -> Dict[Api, Any]:
+def api_protocol_map_for_compliance_check() -> dict[Api, Any]:
+    return {
+        **api_protocol_map(),
+        Api.inference: InferenceProvider,
+    }
+
+
+def additional_protocols_map() -> dict[Api, Any]:
     return {
         Api.inference: (ModelsProtocolPrivate, Models, Api.models),
-        Api.tool_groups: (ToolsProtocolPrivate, ToolGroups, Api.tool_groups),
+        Api.tool_groups: (ToolGroupsProtocolPrivate, ToolGroups, Api.tool_groups),
         Api.vector_io: (VectorDBsProtocolPrivate, VectorDBs, Api.vector_dbs),
         Api.safety: (ShieldsProtocolPrivate, Shields, Api.shields),
         Api.datasetio: (DatasetsProtocolPrivate, Datasets, Api.datasets),
@@ -105,14 +111,14 @@ class ProviderWithSpec(Provider):
     spec: ProviderSpec
 
 
-ProviderRegistry = Dict[Api, Dict[str, ProviderSpec]]
+ProviderRegistry = dict[Api, dict[str, ProviderSpec]]
 
 
 async def resolve_impls(
     run_config: StackRunConfig,
     provider_registry: ProviderRegistry,
     dist_registry: DistributionRegistry,
-) -> Dict[Api, Any]:
+) -> dict[Api, Any]:
     """
     Resolves provider implementations by:
     1. Validating and organizing providers.
@@ -134,10 +140,10 @@ async def resolve_impls(
 
     sorted_providers = sort_providers_by_deps(providers_with_specs, run_config)
 
-    return await instantiate_providers(sorted_providers, router_apis, dist_registry)
+    return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config)
 
 
-def specs_for_autorouted_apis(apis_to_serve: List[str] | Set[str]) -> Dict[str, Dict[str, ProviderWithSpec]]:
+def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, dict[str, ProviderWithSpec]]:
     """Generates specifications for automatically routed APIs."""
     specs = {}
     for info in builtin_automatically_routed_apis():
@@ -179,10 +185,10 @@ def specs_for_autorouted_apis(apis_to_serve: List[str] | Set[str]) -> Dict[str,
 
 
 def validate_and_prepare_providers(
-    run_config: StackRunConfig, provider_registry: ProviderRegistry, routing_table_apis: Set[Api], router_apis: Set[Api]
-) -> Dict[str, Dict[str, ProviderWithSpec]]:
+    run_config: StackRunConfig, provider_registry: ProviderRegistry, routing_table_apis: set[Api], router_apis: set[Api]
+) -> dict[str, dict[str, ProviderWithSpec]]:
     """Validates providers, handles deprecations, and organizes them into a spec dictionary."""
-    providers_with_specs: Dict[str, Dict[str, ProviderWithSpec]] = {}
+    providers_with_specs: dict[str, dict[str, ProviderWithSpec]] = {}
 
     for api_str, providers in run_config.providers.items():
         api = Api(api_str)
@@ -223,53 +229,13 @@ def validate_provider(provider: Provider, api: Api, provider_registry: ProviderR
 
 
 def sort_providers_by_deps(
-    providers_with_specs: Dict[str, Dict[str, ProviderWithSpec]], run_config: StackRunConfig
-) -> List[Tuple[str, ProviderWithSpec]]:
+    providers_with_specs: dict[str, dict[str, ProviderWithSpec]], run_config: StackRunConfig
+) -> list[tuple[str, ProviderWithSpec]]:
     """Sorts providers based on their dependencies."""
-    sorted_providers: List[Tuple[str, ProviderWithSpec]] = topological_sort(
+    sorted_providers: list[tuple[str, ProviderWithSpec]] = topological_sort(
         {k: list(v.values()) for k, v in providers_with_specs.items()}
     )
 
-    # Append built-in "inspect" provider
-    apis = [x[1].spec.api for x in sorted_providers]
-    sorted_providers.append(
-        (
-            "inspect",
-            ProviderWithSpec(
-                provider_id="__builtin__",
-                provider_type="__builtin__",
-                config={"run_config": run_config.model_dump()},
-                spec=InlineProviderSpec(
-                    api=Api.inspect,
-                    provider_type="__builtin__",
-                    config_class="llama_stack.distribution.inspect.DistributionInspectConfig",
-                    module="llama_stack.distribution.inspect",
-                    api_dependencies=apis,
-                    deps__=[x.value for x in apis],
-                ),
-            ),
-        )
-    )
-
-    sorted_providers.append(
-        (
-            "providers",
-            ProviderWithSpec(
-                provider_id="__builtin__",
-                provider_type="__builtin__",
-                config={"run_config": run_config.model_dump()},
-                spec=InlineProviderSpec(
-                    api=Api.providers,
-                    provider_type="__builtin__",
-                    config_class="llama_stack.distribution.providers.ProviderImplConfig",
-                    module="llama_stack.distribution.providers",
-                    api_dependencies=apis,
-                    deps__=[x.value for x in apis],
-                ),
-            ),
-        )
-    )
-
     logger.debug(f"Resolved {len(sorted_providers)} providers")
     for api_str, provider in sorted_providers:
         logger.debug(f" {api_str} => {provider.provider_id}")
@@ -277,11 +243,14 @@ def sort_providers_by_deps(
 
 
 async def instantiate_providers(
-    sorted_providers: List[Tuple[str, ProviderWithSpec]], router_apis: Set[Api], dist_registry: DistributionRegistry
-) -> Dict:
+    sorted_providers: list[tuple[str, ProviderWithSpec]],
+    router_apis: set[Api],
+    dist_registry: DistributionRegistry,
+    run_config: StackRunConfig,
+) -> dict:
     """Instantiates providers asynchronously while managing dependencies."""
-    impls: Dict[Api, Any] = {}
-    inner_impls_by_provider_id: Dict[str, Dict[str, Any]] = {f"inner-{x.value}": {} for x in router_apis}
+    impls: dict[Api, Any] = {}
+    inner_impls_by_provider_id: dict[str, dict[str, Any]] = {f"inner-{x.value}": {} for x in router_apis}
     for api_str, provider in sorted_providers:
         deps = {a: impls[a] for a in provider.spec.api_dependencies}
         for a in provider.spec.optional_api_dependencies:
@@ -292,7 +261,7 @@ async def instantiate_providers(
         if isinstance(provider.spec, RoutingTableProviderSpec):
             inner_impls = inner_impls_by_provider_id[f"inner-{provider.spec.router_api.value}"]
 
-        impl = await instantiate_provider(provider, deps, inner_impls, dist_registry)
+        impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config)
 
         if api_str.startswith("inner-"):
             inner_impls_by_provider_id[api_str][provider.provider_id] = impl
@@ -304,9 +273,9 @@ async def instantiate_providers(
 
 
 def topological_sort(
-    providers_with_specs: Dict[str, List[ProviderWithSpec]],
-) -> List[Tuple[str, ProviderWithSpec]]:
-    def dfs(kv, visited: Set[str], stack: List[str]):
+    providers_with_specs: dict[str, list[ProviderWithSpec]],
+) -> list[tuple[str, ProviderWithSpec]]:
+    def dfs(kv, visited: set[str], stack: list[str]):
         api_str, providers = kv
         visited.add(api_str)
 
@@ -321,8 +290,8 @@ def topological_sort(
 
         stack.append(api_str)
 
-    visited: Set[str] = set()
-    stack: List[str] = []
+    visited: set[str] = set()
+    stack: list[str] = []
 
     for api_str, providers in providers_with_specs.items():
         if api_str not in visited:
@@ -339,13 +308,11 @@ def topological_sort(
 # returns a class implementing the protocol corresponding to the Api
 async def instantiate_provider(
     provider: ProviderWithSpec,
-    deps: Dict[Api, Any],
-    inner_impls: Dict[str, Any],
+    deps: dict[Api, Any],
+    inner_impls: dict[str, Any],
     dist_registry: DistributionRegistry,
+    run_config: StackRunConfig,
 ):
-    protocols = api_protocol_map()
-    additional_protocols = additional_protocols_map()
-
     provider_spec = provider.spec
     if not hasattr(provider_spec, "module"):
         raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
@@ -364,7 +331,7 @@ async def instantiate_provider(
         method = "get_auto_router_impl"
 
         config = None
-        args = [provider_spec.api, deps[provider_spec.routing_table_api], deps]
+        args = [provider_spec.api, deps[provider_spec.routing_table_api], deps, run_config]
     elif isinstance(provider_spec, RoutingTableProviderSpec):
         method = "get_routing_table_impl"
 
@@ -383,6 +350,8 @@ async def instantiate_provider(
     impl.__provider_spec__ = provider_spec
     impl.__provider_config__ = config
 
+    protocols = api_protocol_map_for_compliance_check()
+    additional_protocols = additional_protocols_map()
     # TODO: check compliance for special tool groups
     # the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol
     check_protocol_compliance(impl, protocols[provider_spec.api])
@@ -432,8 +401,8 @@ def check_protocol_compliance(obj: Any, protocol: Any) -> None:
 
 async def resolve_remote_stack_impls(
     config: RemoteProviderConfig,
-    apis: List[str],
-) -> Dict[Api, Any]:
+    apis: list[str],
+) -> dict[Api, Any]:
     protocols = api_protocol_map()
     additional_protocols = additional_protocols_map()
 
diff --git a/llama_stack/distribution/routers/__init__.py b/llama_stack/distribution/routers/__init__.py
index d0fca8771..1358d5812 100644
--- a/llama_stack/distribution/routers/__init__.py
+++ b/llama_stack/distribution/routers/__init__.py
@@ -4,29 +4,29 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.distribution.datatypes import RoutedProtocol
+from llama_stack.distribution.stack import StackRunConfig
 from llama_stack.distribution.store import DistributionRegistry
 from llama_stack.providers.datatypes import Api, RoutingTable
-
-from .routing_tables import (
-    BenchmarksRoutingTable,
-    DatasetsRoutingTable,
-    ModelsRoutingTable,
-    ScoringFunctionsRoutingTable,
-    ShieldsRoutingTable,
-    ToolGroupsRoutingTable,
-    VectorDBsRoutingTable,
-)
+from llama_stack.providers.utils.inference.inference_store import InferenceStore
 
 
 async def get_routing_table_impl(
     api: Api,
-    impls_by_provider_id: Dict[str, RoutedProtocol],
+    impls_by_provider_id: dict[str, RoutedProtocol],
     _deps,
     dist_registry: DistributionRegistry,
 ) -> Any:
+    from ..routing_tables.benchmarks import BenchmarksRoutingTable
+    from ..routing_tables.datasets import DatasetsRoutingTable
+    from ..routing_tables.models import ModelsRoutingTable
+    from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
+    from ..routing_tables.shields import ShieldsRoutingTable
+    from ..routing_tables.toolgroups import ToolGroupsRoutingTable
+    from ..routing_tables.vector_dbs import VectorDBsRoutingTable
+
     api_to_tables = {
         "vector_dbs": VectorDBsRoutingTable,
         "models": ModelsRoutingTable,
@@ -45,16 +45,15 @@ async def get_routing_table_impl(
     return impl
 
 
-async def get_auto_router_impl(api: Api, routing_table: RoutingTable, deps: Dict[str, Any]) -> Any:
-    from .routers import (
-        DatasetIORouter,
-        EvalRouter,
-        InferenceRouter,
-        SafetyRouter,
-        ScoringRouter,
-        ToolRuntimeRouter,
-        VectorIORouter,
-    )
+async def get_auto_router_impl(
+    api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackRunConfig
+) -> Any:
+    from .datasets import DatasetIORouter
+    from .eval_scoring import EvalRouter, ScoringRouter
+    from .inference import InferenceRouter
+    from .safety import SafetyRouter
+    from .tool_runtime import ToolRuntimeRouter
+    from .vector_io import VectorIORouter
 
     api_to_routers = {
         "vector_io": VectorIORouter,
@@ -76,6 +75,12 @@ async def get_auto_router_impl(api: Api, routing_table: RoutingTable, deps: Dict
         if dep_api in deps:
             api_to_dep_impl[dep_name] = deps[dep_api]
 
+    # TODO: move pass configs to routers instead
+    if api == Api.inference and run_config.inference_store:
+        inference_store = InferenceStore(run_config.inference_store)
+        await inference_store.initialize()
+        api_to_dep_impl["store"] = inference_store
+
     impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
     await impl.initialize()
     return impl
diff --git a/llama_stack/distribution/routers/datasets.py b/llama_stack/distribution/routers/datasets.py
new file mode 100644
index 000000000..6f28756c9
--- /dev/null
+++ b/llama_stack/distribution/routers/datasets.py
@@ -0,0 +1,71 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack.apis.datasetio import DatasetIO
+from llama_stack.apis.datasets import DatasetPurpose, DataSource
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import RoutingTable
+
+logger = get_logger(name=__name__, category="core")
+
+
+class DatasetIORouter(DatasetIO):
+    def __init__(
+        self,
+        routing_table: RoutingTable,
+    ) -> None:
+        logger.debug("Initializing DatasetIORouter")
+        self.routing_table = routing_table
+
+    async def initialize(self) -> None:
+        logger.debug("DatasetIORouter.initialize")
+        pass
+
+    async def shutdown(self) -> None:
+        logger.debug("DatasetIORouter.shutdown")
+        pass
+
+    async def register_dataset(
+        self,
+        purpose: DatasetPurpose,
+        source: DataSource,
+        metadata: dict[str, Any] | None = None,
+        dataset_id: str | None = None,
+    ) -> None:
+        logger.debug(
+            f"DatasetIORouter.register_dataset: {purpose=} {source=} {metadata=} {dataset_id=}",
+        )
+        await self.routing_table.register_dataset(
+            purpose=purpose,
+            source=source,
+            metadata=metadata,
+            dataset_id=dataset_id,
+        )
+
+    async def iterrows(
+        self,
+        dataset_id: str,
+        start_index: int | None = None,
+        limit: int | None = None,
+    ) -> PaginatedResponse:
+        logger.debug(
+            f"DatasetIORouter.iterrows: {dataset_id}, {start_index=} {limit=}",
+        )
+        return await self.routing_table.get_provider_impl(dataset_id).iterrows(
+            dataset_id=dataset_id,
+            start_index=start_index,
+            limit=limit,
+        )
+
+    async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
+        logger.debug(f"DatasetIORouter.append_rows: {dataset_id}, {len(rows)} rows")
+        return await self.routing_table.get_provider_impl(dataset_id).append_rows(
+            dataset_id=dataset_id,
+            rows=rows,
+        )
diff --git a/llama_stack/distribution/routers/eval_scoring.py b/llama_stack/distribution/routers/eval_scoring.py
new file mode 100644
index 000000000..fd0bb90a7
--- /dev/null
+++ b/llama_stack/distribution/routers/eval_scoring.py
@@ -0,0 +1,148 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
+from llama_stack.apis.scoring import (
+    ScoreBatchResponse,
+    ScoreResponse,
+    Scoring,
+    ScoringFnParams,
+)
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import RoutingTable
+
+logger = get_logger(name=__name__, category="core")
+
+
+class ScoringRouter(Scoring):
+    def __init__(
+        self,
+        routing_table: RoutingTable,
+    ) -> None:
+        logger.debug("Initializing ScoringRouter")
+        self.routing_table = routing_table
+
+    async def initialize(self) -> None:
+        logger.debug("ScoringRouter.initialize")
+        pass
+
+    async def shutdown(self) -> None:
+        logger.debug("ScoringRouter.shutdown")
+        pass
+
+    async def score_batch(
+        self,
+        dataset_id: str,
+        scoring_functions: dict[str, ScoringFnParams | None] = None,
+        save_results_dataset: bool = False,
+    ) -> ScoreBatchResponse:
+        logger.debug(f"ScoringRouter.score_batch: {dataset_id}")
+        res = {}
+        for fn_identifier in scoring_functions.keys():
+            score_response = await self.routing_table.get_provider_impl(fn_identifier).score_batch(
+                dataset_id=dataset_id,
+                scoring_functions={fn_identifier: scoring_functions[fn_identifier]},
+            )
+            res.update(score_response.results)
+
+        if save_results_dataset:
+            raise NotImplementedError("Save results dataset not implemented yet")
+
+        return ScoreBatchResponse(
+            results=res,
+        )
+
+    async def score(
+        self,
+        input_rows: list[dict[str, Any]],
+        scoring_functions: dict[str, ScoringFnParams | None] = None,
+    ) -> ScoreResponse:
+        logger.debug(f"ScoringRouter.score: {len(input_rows)} rows, {len(scoring_functions)} functions")
+        res = {}
+        # look up and map each scoring function to its provider impl
+        for fn_identifier in scoring_functions.keys():
+            score_response = await self.routing_table.get_provider_impl(fn_identifier).score(
+                input_rows=input_rows,
+                scoring_functions={fn_identifier: scoring_functions[fn_identifier]},
+            )
+            res.update(score_response.results)
+
+        return ScoreResponse(results=res)
+
+
+class EvalRouter(Eval):
+    def __init__(
+        self,
+        routing_table: RoutingTable,
+    ) -> None:
+        logger.debug("Initializing EvalRouter")
+        self.routing_table = routing_table
+
+    async def initialize(self) -> None:
+        logger.debug("EvalRouter.initialize")
+        pass
+
+    async def shutdown(self) -> None:
+        logger.debug("EvalRouter.shutdown")
+        pass
+
+    async def run_eval(
+        self,
+        benchmark_id: str,
+        benchmark_config: BenchmarkConfig,
+    ) -> Job:
+        logger.debug(f"EvalRouter.run_eval: {benchmark_id}")
+        return await self.routing_table.get_provider_impl(benchmark_id).run_eval(
+            benchmark_id=benchmark_id,
+            benchmark_config=benchmark_config,
+        )
+
+    async def evaluate_rows(
+        self,
+        benchmark_id: str,
+        input_rows: list[dict[str, Any]],
+        scoring_functions: list[str],
+        benchmark_config: BenchmarkConfig,
+    ) -> EvaluateResponse:
+        logger.debug(f"EvalRouter.evaluate_rows: {benchmark_id}, {len(input_rows)} rows")
+        return await self.routing_table.get_provider_impl(benchmark_id).evaluate_rows(
+            benchmark_id=benchmark_id,
+            input_rows=input_rows,
+            scoring_functions=scoring_functions,
+            benchmark_config=benchmark_config,
+        )
+
+    async def job_status(
+        self,
+        benchmark_id: str,
+        job_id: str,
+    ) -> Job:
+        logger.debug(f"EvalRouter.job_status: {benchmark_id}, {job_id}")
+        return await self.routing_table.get_provider_impl(benchmark_id).job_status(benchmark_id, job_id)
+
+    async def job_cancel(
+        self,
+        benchmark_id: str,
+        job_id: str,
+    ) -> None:
+        logger.debug(f"EvalRouter.job_cancel: {benchmark_id}, {job_id}")
+        await self.routing_table.get_provider_impl(benchmark_id).job_cancel(
+            benchmark_id,
+            job_id,
+        )
+
+    async def job_result(
+        self,
+        benchmark_id: str,
+        job_id: str,
+    ) -> EvaluateResponse:
+        logger.debug(f"EvalRouter.job_result: {benchmark_id}, {job_id}")
+        return await self.routing_table.get_provider_impl(benchmark_id).job_result(
+            benchmark_id,
+            job_id,
+        )
diff --git a/llama_stack/distribution/routers/inference.py b/llama_stack/distribution/routers/inference.py
new file mode 100644
index 000000000..763bd9105
--- /dev/null
+++ b/llama_stack/distribution/routers/inference.py
@@ -0,0 +1,624 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+import time
+from collections.abc import AsyncGenerator, AsyncIterator
+from typing import Annotated, Any
+
+from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
+from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
+from pydantic import Field, TypeAdapter
+
+from llama_stack.apis.common.content_types import (
+    InterleavedContent,
+    InterleavedContentItem,
+)
+from llama_stack.apis.inference import (
+    BatchChatCompletionResponse,
+    BatchCompletionResponse,
+    ChatCompletionResponse,
+    ChatCompletionResponseEventType,
+    ChatCompletionResponseStreamChunk,
+    CompletionMessage,
+    EmbeddingsResponse,
+    EmbeddingTaskType,
+    Inference,
+    ListOpenAIChatCompletionResponse,
+    LogProbConfig,
+    Message,
+    OpenAICompletionWithInputMessages,
+    Order,
+    ResponseFormat,
+    SamplingParams,
+    StopReason,
+    TextTruncation,
+    ToolChoice,
+    ToolConfig,
+    ToolDefinition,
+    ToolPromptFormat,
+)
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+)
+from llama_stack.apis.models import Model, ModelType
+from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
+from llama_stack.log import get_logger
+from llama_stack.models.llama.llama3.chat_format import ChatFormat
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
+from llama_stack.providers.utils.inference.inference_store import InferenceStore
+from llama_stack.providers.utils.inference.stream_utils import stream_and_store_openai_completion
+from llama_stack.providers.utils.telemetry.tracing import get_current_span
+
+logger = get_logger(name=__name__, category="core")
+
+
+class InferenceRouter(Inference):
+    """Routes to an provider based on the model"""
+
+    def __init__(
+        self,
+        routing_table: RoutingTable,
+        telemetry: Telemetry | None = None,
+        store: InferenceStore | None = None,
+    ) -> None:
+        logger.debug("Initializing InferenceRouter")
+        self.routing_table = routing_table
+        self.telemetry = telemetry
+        self.store = store
+        if self.telemetry:
+            self.tokenizer = Tokenizer.get_instance()
+            self.formatter = ChatFormat(self.tokenizer)
+
+    async def initialize(self) -> None:
+        logger.debug("InferenceRouter.initialize")
+        pass
+
+    async def shutdown(self) -> None:
+        logger.debug("InferenceRouter.shutdown")
+        pass
+
+    async def register_model(
+        self,
+        model_id: str,
+        provider_model_id: str | None = None,
+        provider_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+        model_type: ModelType | None = None,
+    ) -> None:
+        logger.debug(
+            f"InferenceRouter.register_model: {model_id=} {provider_model_id=} {provider_id=} {metadata=} {model_type=}",
+        )
+        await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
+
+    def _construct_metrics(
+        self,
+        prompt_tokens: int,
+        completion_tokens: int,
+        total_tokens: int,
+        model: Model,
+    ) -> list[MetricEvent]:
+        """Constructs a list of MetricEvent objects containing token usage metrics.
+
+        Args:
+            prompt_tokens: Number of tokens in the prompt
+            completion_tokens: Number of tokens in the completion
+            total_tokens: Total number of tokens used
+            model: Model object containing model_id and provider_id
+
+        Returns:
+            List of MetricEvent objects with token usage metrics
+        """
+        span = get_current_span()
+        if span is None:
+            logger.warning("No span found for token usage metrics")
+            return []
+        metrics = [
+            ("prompt_tokens", prompt_tokens),
+            ("completion_tokens", completion_tokens),
+            ("total_tokens", total_tokens),
+        ]
+        metric_events = []
+        for metric_name, value in metrics:
+            metric_events.append(
+                MetricEvent(
+                    trace_id=span.trace_id,
+                    span_id=span.span_id,
+                    metric=metric_name,
+                    value=value,
+                    timestamp=time.time(),
+                    unit="tokens",
+                    attributes={
+                        "model_id": model.model_id,
+                        "provider_id": model.provider_id,
+                    },
+                )
+            )
+        return metric_events
+
+    async def _compute_and_log_token_usage(
+        self,
+        prompt_tokens: int,
+        completion_tokens: int,
+        total_tokens: int,
+        model: Model,
+    ) -> list[MetricInResponse]:
+        metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
+        if self.telemetry:
+            for metric in metrics:
+                await self.telemetry.log_event(metric)
+        return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
+
+    async def _count_tokens(
+        self,
+        messages: list[Message] | InterleavedContent,
+        tool_prompt_format: ToolPromptFormat | None = None,
+    ) -> int | None:
+        if isinstance(messages, list):
+            encoded = self.formatter.encode_dialog_prompt(messages, tool_prompt_format)
+        else:
+            encoded = self.formatter.encode_content(messages)
+        return len(encoded.tokens) if encoded and encoded.tokens else 0
+
+    async def chat_completion(
+        self,
+        model_id: str,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = None,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
+    ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
+        logger.debug(
+            f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
+        )
+        if sampling_params is None:
+            sampling_params = SamplingParams()
+        model = await self.routing_table.get_model(model_id)
+        if model is None:
+            raise ValueError(f"Model '{model_id}' not found")
+        if model.model_type == ModelType.embedding:
+            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
+        if tool_config:
+            if tool_choice and tool_choice != tool_config.tool_choice:
+                raise ValueError("tool_choice and tool_config.tool_choice must match")
+            if tool_prompt_format and tool_prompt_format != tool_config.tool_prompt_format:
+                raise ValueError("tool_prompt_format and tool_config.tool_prompt_format must match")
+        else:
+            params = {}
+            if tool_choice:
+                params["tool_choice"] = tool_choice
+            if tool_prompt_format:
+                params["tool_prompt_format"] = tool_prompt_format
+            tool_config = ToolConfig(**params)
+
+        tools = tools or []
+        if tool_config.tool_choice == ToolChoice.none:
+            tools = []
+        elif tool_config.tool_choice == ToolChoice.auto:
+            pass
+        elif tool_config.tool_choice == ToolChoice.required:
+            pass
+        else:
+            # verify tool_choice is one of the tools
+            tool_names = [t.tool_name if isinstance(t.tool_name, str) else t.tool_name.value for t in tools]
+            if tool_config.tool_choice not in tool_names:
+                raise ValueError(f"Tool choice {tool_config.tool_choice} is not one of the tools: {tool_names}")
+
+        params = dict(
+            model_id=model_id,
+            messages=messages,
+            sampling_params=sampling_params,
+            tools=tools,
+            tool_choice=tool_choice,
+            tool_prompt_format=tool_prompt_format,
+            response_format=response_format,
+            stream=stream,
+            logprobs=logprobs,
+            tool_config=tool_config,
+        )
+        provider = self.routing_table.get_provider_impl(model_id)
+        prompt_tokens = await self._count_tokens(messages, tool_config.tool_prompt_format)
+
+        if stream:
+
+            async def stream_generator():
+                completion_text = ""
+                async for chunk in await provider.chat_completion(**params):
+                    if chunk.event.event_type == ChatCompletionResponseEventType.progress:
+                        if chunk.event.delta.type == "text":
+                            completion_text += chunk.event.delta.text
+                    if chunk.event.event_type == ChatCompletionResponseEventType.complete:
+                        completion_tokens = await self._count_tokens(
+                            [
+                                CompletionMessage(
+                                    content=completion_text,
+                                    stop_reason=StopReason.end_of_turn,
+                                )
+                            ],
+                            tool_config.tool_prompt_format,
+                        )
+                        total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
+                        metrics = await self._compute_and_log_token_usage(
+                            prompt_tokens or 0,
+                            completion_tokens or 0,
+                            total_tokens,
+                            model,
+                        )
+                        chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics
+                    yield chunk
+
+            return stream_generator()
+        else:
+            response = await provider.chat_completion(**params)
+            completion_tokens = await self._count_tokens(
+                [response.completion_message],
+                tool_config.tool_prompt_format,
+            )
+            total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
+            metrics = await self._compute_and_log_token_usage(
+                prompt_tokens or 0,
+                completion_tokens or 0,
+                total_tokens,
+                model,
+            )
+            response.metrics = metrics if response.metrics is None else response.metrics + metrics
+            return response
+
+    async def batch_chat_completion(
+        self,
+        model_id: str,
+        messages_batch: list[list[Message]],
+        tools: list[ToolDefinition] | None = None,
+        tool_config: ToolConfig | None = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
+    ) -> BatchChatCompletionResponse:
+        logger.debug(
+            f"InferenceRouter.batch_chat_completion: {model_id=}, {len(messages_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
+        )
+        provider = self.routing_table.get_provider_impl(model_id)
+        return await provider.batch_chat_completion(
+            model_id=model_id,
+            messages_batch=messages_batch,
+            tools=tools,
+            tool_config=tool_config,
+            sampling_params=sampling_params,
+            response_format=response_format,
+            logprobs=logprobs,
+        )
+
+    async def completion(
+        self,
+        model_id: str,
+        content: InterleavedContent,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+    ) -> AsyncGenerator:
+        if sampling_params is None:
+            sampling_params = SamplingParams()
+        logger.debug(
+            f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}",
+        )
+        model = await self.routing_table.get_model(model_id)
+        if model is None:
+            raise ValueError(f"Model '{model_id}' not found")
+        if model.model_type == ModelType.embedding:
+            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
+        provider = self.routing_table.get_provider_impl(model_id)
+        params = dict(
+            model_id=model_id,
+            content=content,
+            sampling_params=sampling_params,
+            response_format=response_format,
+            stream=stream,
+            logprobs=logprobs,
+        )
+
+        prompt_tokens = await self._count_tokens(content)
+
+        if stream:
+
+            async def stream_generator():
+                completion_text = ""
+                async for chunk in await provider.completion(**params):
+                    if hasattr(chunk, "delta"):
+                        completion_text += chunk.delta
+                    if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry:
+                        completion_tokens = await self._count_tokens(completion_text)
+                        total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
+                        metrics = await self._compute_and_log_token_usage(
+                            prompt_tokens or 0,
+                            completion_tokens or 0,
+                            total_tokens,
+                            model,
+                        )
+                        chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics
+                    yield chunk
+
+            return stream_generator()
+        else:
+            response = await provider.completion(**params)
+            completion_tokens = await self._count_tokens(response.content)
+            total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
+            metrics = await self._compute_and_log_token_usage(
+                prompt_tokens or 0,
+                completion_tokens or 0,
+                total_tokens,
+                model,
+            )
+            response.metrics = metrics if response.metrics is None else response.metrics + metrics
+            return response
+
+    async def batch_completion(
+        self,
+        model_id: str,
+        content_batch: list[InterleavedContent],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
+    ) -> BatchCompletionResponse:
+        logger.debug(
+            f"InferenceRouter.batch_completion: {model_id=}, {len(content_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
+        )
+        provider = self.routing_table.get_provider_impl(model_id)
+        return await provider.batch_completion(model_id, content_batch, sampling_params, response_format, logprobs)
+
+    async def embeddings(
+        self,
+        model_id: str,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
+    ) -> EmbeddingsResponse:
+        logger.debug(f"InferenceRouter.embeddings: {model_id}")
+        model = await self.routing_table.get_model(model_id)
+        if model is None:
+            raise ValueError(f"Model '{model_id}' not found")
+        if model.model_type == ModelType.llm:
+            raise ValueError(f"Model '{model_id}' is an LLM model and does not support embeddings")
+        return await self.routing_table.get_provider_impl(model_id).embeddings(
+            model_id=model_id,
+            contents=contents,
+            text_truncation=text_truncation,
+            output_dimension=output_dimension,
+            task_type=task_type,
+        )
+
+    async def openai_completion(
+        self,
+        model: str,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
+    ) -> OpenAICompletion:
+        logger.debug(
+            f"InferenceRouter.openai_completion: {model=}, {stream=}, {prompt=}",
+        )
+        model_obj = await self.routing_table.get_model(model)
+        if model_obj is None:
+            raise ValueError(f"Model '{model}' not found")
+        if model_obj.model_type == ModelType.embedding:
+            raise ValueError(f"Model '{model}' is an embedding model and does not support completions")
+
+        params = dict(
+            model=model_obj.identifier,
+            prompt=prompt,
+            best_of=best_of,
+            echo=echo,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_tokens=max_tokens,
+            n=n,
+            presence_penalty=presence_penalty,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            top_p=top_p,
+            user=user,
+            guided_choice=guided_choice,
+            prompt_logprobs=prompt_logprobs,
+        )
+
+        provider = self.routing_table.get_provider_impl(model_obj.identifier)
+        return await provider.openai_completion(**params)
+
+    async def openai_chat_completion(
+        self,
+        model: str,
+        messages: Annotated[list[OpenAIMessageParam], Field(..., min_length=1)],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        logger.debug(
+            f"InferenceRouter.openai_chat_completion: {model=}, {stream=}, {messages=}",
+        )
+        model_obj = await self.routing_table.get_model(model)
+        if model_obj is None:
+            raise ValueError(f"Model '{model}' not found")
+        if model_obj.model_type == ModelType.embedding:
+            raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions")
+
+        # Use the OpenAI client for a bit of extra input validation without
+        # exposing the OpenAI client itself as part of our API surface
+        if tool_choice:
+            TypeAdapter(OpenAIChatCompletionToolChoiceOptionParam).validate_python(tool_choice)
+            if tools is None:
+                raise ValueError("'tool_choice' is only allowed when 'tools' is also provided")
+        if tools:
+            for tool in tools:
+                TypeAdapter(OpenAIChatCompletionToolParam).validate_python(tool)
+
+        # Some providers make tool calls even when tool_choice is "none"
+        # so just clear them both out to avoid unexpected tool calls
+        if tool_choice == "none" and tools is not None:
+            tool_choice = None
+            tools = None
+
+        params = dict(
+            model=model_obj.identifier,
+            messages=messages,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            presence_penalty=presence_penalty,
+            response_format=response_format,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+        )
+
+        provider = self.routing_table.get_provider_impl(model_obj.identifier)
+        if stream:
+            response_stream = await provider.openai_chat_completion(**params)
+            if self.store:
+                return stream_and_store_openai_completion(response_stream, model, self.store, messages)
+            return response_stream
+        else:
+            response = await self._nonstream_openai_chat_completion(provider, params)
+            if self.store:
+                await self.store.store_chat_completion(response, messages)
+            return response
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        logger.debug(
+            f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}",
+        )
+        model_obj = await self.routing_table.get_model(model)
+        if model_obj is None:
+            raise ValueError(f"Model '{model}' not found")
+        if model_obj.model_type != ModelType.embedding:
+            raise ValueError(f"Model '{model}' is not an embedding model")
+
+        params = dict(
+            model=model_obj.identifier,
+            input=input,
+            encoding_format=encoding_format,
+            dimensions=dimensions,
+            user=user,
+        )
+
+        provider = self.routing_table.get_provider_impl(model_obj.identifier)
+        return await provider.openai_embeddings(**params)
+
+    async def list_chat_completions(
+        self,
+        after: str | None = None,
+        limit: int | None = 20,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIChatCompletionResponse:
+        if self.store:
+            return await self.store.list_chat_completions(after, limit, model, order)
+        raise NotImplementedError("List chat completions is not supported: inference store is not configured.")
+
+    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
+        if self.store:
+            return await self.store.get_chat_completion(completion_id)
+        raise NotImplementedError("Get chat completion is not supported: inference store is not configured.")
+
+    async def _nonstream_openai_chat_completion(self, provider: Inference, params: dict) -> OpenAIChatCompletion:
+        response = await provider.openai_chat_completion(**params)
+        for choice in response.choices:
+            # some providers return an empty list for no tool calls in non-streaming responses
+            # but the OpenAI API returns None. So, set tool_calls to None if it's empty
+            if choice.message and choice.message.tool_calls is not None and len(choice.message.tool_calls) == 0:
+                choice.message.tool_calls = None
+        return response
+
+    async def health(self) -> dict[str, HealthResponse]:
+        health_statuses = {}
+        timeout = 0.5
+        for provider_id, impl in self.routing_table.impls_by_provider_id.items():
+            try:
+                # check if the provider has a health method
+                if not hasattr(impl, "health"):
+                    continue
+                health = await asyncio.wait_for(impl.health(), timeout=timeout)
+                health_statuses[provider_id] = health
+            except (asyncio.TimeoutError, TimeoutError):
+                health_statuses[provider_id] = HealthResponse(
+                    status=HealthStatus.ERROR,
+                    message=f"Health check timed out after {timeout} seconds",
+                )
+            except NotImplementedError:
+                health_statuses[provider_id] = HealthResponse(status=HealthStatus.NOT_IMPLEMENTED)
+            except Exception as e:
+                health_statuses[provider_id] = HealthResponse(
+                    status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"
+                )
+        return health_statuses
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
deleted file mode 100644
index b9623ef3c..000000000
--- a/llama_stack/distribution/routers/routers.py
+++ /dev/null
@@ -1,873 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import time
-from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
-
-from llama_stack.apis.common.content_types import (
-    URL,
-    InterleavedContent,
-    InterleavedContentItem,
-)
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import DatasetPurpose, DataSource
-from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
-from llama_stack.apis.inference import (
-    BatchChatCompletionResponse,
-    BatchCompletionResponse,
-    ChatCompletionResponse,
-    ChatCompletionResponseEventType,
-    ChatCompletionResponseStreamChunk,
-    CompletionMessage,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
-    Inference,
-    LogProbConfig,
-    Message,
-    ResponseFormat,
-    SamplingParams,
-    StopReason,
-    TextTruncation,
-    ToolChoice,
-    ToolConfig,
-    ToolDefinition,
-    ToolPromptFormat,
-)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
-from llama_stack.apis.models import Model, ModelType
-from llama_stack.apis.safety import RunShieldResponse, Safety
-from llama_stack.apis.scoring import (
-    ScoreBatchResponse,
-    ScoreResponse,
-    Scoring,
-    ScoringFnParams,
-)
-from llama_stack.apis.shields import Shield
-from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
-from llama_stack.apis.tools import (
-    ListToolDefsResponse,
-    RAGDocument,
-    RAGQueryConfig,
-    RAGQueryResult,
-    RAGToolRuntime,
-    ToolRuntime,
-)
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.log import get_logger
-from llama_stack.models.llama.llama3.chat_format import ChatFormat
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.datatypes import RoutingTable
-from llama_stack.providers.utils.telemetry.tracing import get_current_span
-
-logger = get_logger(name=__name__, category="core")
-
-
-class VectorIORouter(VectorIO):
-    """Routes to an provider based on the vector db identifier"""
-
-    def __init__(
-        self,
-        routing_table: RoutingTable,
-    ) -> None:
-        logger.debug("Initializing VectorIORouter")
-        self.routing_table = routing_table
-
-    async def initialize(self) -> None:
-        logger.debug("VectorIORouter.initialize")
-        pass
-
-    async def shutdown(self) -> None:
-        logger.debug("VectorIORouter.shutdown")
-        pass
-
-    async def register_vector_db(
-        self,
-        vector_db_id: str,
-        embedding_model: str,
-        embedding_dimension: Optional[int] = 384,
-        provider_id: Optional[str] = None,
-        provider_vector_db_id: Optional[str] = None,
-    ) -> None:
-        logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
-        await self.routing_table.register_vector_db(
-            vector_db_id,
-            embedding_model,
-            embedding_dimension,
-            provider_id,
-            provider_vector_db_id,
-        )
-
-    async def insert_chunks(
-        self,
-        vector_db_id: str,
-        chunks: List[Chunk],
-        ttl_seconds: Optional[int] = None,
-    ) -> None:
-        logger.debug(
-            f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.metadata['document_id'] for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}",
-        )
-        return await self.routing_table.get_provider_impl(vector_db_id).insert_chunks(vector_db_id, chunks, ttl_seconds)
-
-    async def query_chunks(
-        self,
-        vector_db_id: str,
-        query: InterleavedContent,
-        params: Optional[Dict[str, Any]] = None,
-    ) -> QueryChunksResponse:
-        logger.debug(f"VectorIORouter.query_chunks: {vector_db_id}")
-        return await self.routing_table.get_provider_impl(vector_db_id).query_chunks(vector_db_id, query, params)
-
-
-class InferenceRouter(Inference):
-    """Routes to an provider based on the model"""
-
-    def __init__(
-        self,
-        routing_table: RoutingTable,
-        telemetry: Optional[Telemetry] = None,
-    ) -> None:
-        logger.debug("Initializing InferenceRouter")
-        self.routing_table = routing_table
-        self.telemetry = telemetry
-        if self.telemetry:
-            self.tokenizer = Tokenizer.get_instance()
-            self.formatter = ChatFormat(self.tokenizer)
-
-    async def initialize(self) -> None:
-        logger.debug("InferenceRouter.initialize")
-        pass
-
-    async def shutdown(self) -> None:
-        logger.debug("InferenceRouter.shutdown")
-        pass
-
-    async def register_model(
-        self,
-        model_id: str,
-        provider_model_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-        model_type: Optional[ModelType] = None,
-    ) -> None:
-        logger.debug(
-            f"InferenceRouter.register_model: {model_id=} {provider_model_id=} {provider_id=} {metadata=} {model_type=}",
-        )
-        await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
-
-    def _construct_metrics(
-        self,
-        prompt_tokens: int,
-        completion_tokens: int,
-        total_tokens: int,
-        model: Model,
-    ) -> List[MetricEvent]:
-        """Constructs a list of MetricEvent objects containing token usage metrics.
-
-        Args:
-            prompt_tokens: Number of tokens in the prompt
-            completion_tokens: Number of tokens in the completion
-            total_tokens: Total number of tokens used
-            model: Model object containing model_id and provider_id
-
-        Returns:
-            List of MetricEvent objects with token usage metrics
-        """
-        span = get_current_span()
-        if span is None:
-            logger.warning("No span found for token usage metrics")
-            return []
-        metrics = [
-            ("prompt_tokens", prompt_tokens),
-            ("completion_tokens", completion_tokens),
-            ("total_tokens", total_tokens),
-        ]
-        metric_events = []
-        for metric_name, value in metrics:
-            metric_events.append(
-                MetricEvent(
-                    trace_id=span.trace_id,
-                    span_id=span.span_id,
-                    metric=metric_name,
-                    value=value,
-                    timestamp=time.time(),
-                    unit="tokens",
-                    attributes={
-                        "model_id": model.model_id,
-                        "provider_id": model.provider_id,
-                    },
-                )
-            )
-        return metric_events
-
-    async def _compute_and_log_token_usage(
-        self,
-        prompt_tokens: int,
-        completion_tokens: int,
-        total_tokens: int,
-        model: Model,
-    ) -> List[MetricInResponse]:
-        metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
-        if self.telemetry:
-            for metric in metrics:
-                await self.telemetry.log_event(metric)
-        return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
-
-    async def _count_tokens(
-        self,
-        messages: List[Message] | InterleavedContent,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-    ) -> Optional[int]:
-        if isinstance(messages, list):
-            encoded = self.formatter.encode_dialog_prompt(messages, tool_prompt_format)
-        else:
-            encoded = self.formatter.encode_content(messages)
-        return len(encoded.tokens) if encoded and encoded.tokens else 0
-
-    async def chat_completion(
-        self,
-        model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = None,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
-    ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
-        logger.debug(
-            f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
-        )
-        if sampling_params is None:
-            sampling_params = SamplingParams()
-        model = await self.routing_table.get_model(model_id)
-        if model is None:
-            raise ValueError(f"Model '{model_id}' not found")
-        if model.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
-        if tool_config:
-            if tool_choice and tool_choice != tool_config.tool_choice:
-                raise ValueError("tool_choice and tool_config.tool_choice must match")
-            if tool_prompt_format and tool_prompt_format != tool_config.tool_prompt_format:
-                raise ValueError("tool_prompt_format and tool_config.tool_prompt_format must match")
-        else:
-            params = {}
-            if tool_choice:
-                params["tool_choice"] = tool_choice
-            if tool_prompt_format:
-                params["tool_prompt_format"] = tool_prompt_format
-            tool_config = ToolConfig(**params)
-
-        tools = tools or []
-        if tool_config.tool_choice == ToolChoice.none:
-            tools = []
-        elif tool_config.tool_choice == ToolChoice.auto:
-            pass
-        elif tool_config.tool_choice == ToolChoice.required:
-            pass
-        else:
-            # verify tool_choice is one of the tools
-            tool_names = [t.tool_name if isinstance(t.tool_name, str) else t.tool_name.value for t in tools]
-            if tool_config.tool_choice not in tool_names:
-                raise ValueError(f"Tool choice {tool_config.tool_choice} is not one of the tools: {tool_names}")
-
-        params = dict(
-            model_id=model_id,
-            messages=messages,
-            sampling_params=sampling_params,
-            tools=tools,
-            tool_choice=tool_choice,
-            tool_prompt_format=tool_prompt_format,
-            response_format=response_format,
-            stream=stream,
-            logprobs=logprobs,
-            tool_config=tool_config,
-        )
-        provider = self.routing_table.get_provider_impl(model_id)
-        prompt_tokens = await self._count_tokens(messages, tool_config.tool_prompt_format)
-
-        if stream:
-
-            async def stream_generator():
-                completion_text = ""
-                async for chunk in await provider.chat_completion(**params):
-                    if chunk.event.event_type == ChatCompletionResponseEventType.progress:
-                        if chunk.event.delta.type == "text":
-                            completion_text += chunk.event.delta.text
-                    if chunk.event.event_type == ChatCompletionResponseEventType.complete:
-                        completion_tokens = await self._count_tokens(
-                            [
-                                CompletionMessage(
-                                    content=completion_text,
-                                    stop_reason=StopReason.end_of_turn,
-                                )
-                            ],
-                            tool_config.tool_prompt_format,
-                        )
-                        total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
-                        metrics = await self._compute_and_log_token_usage(
-                            prompt_tokens or 0,
-                            completion_tokens or 0,
-                            total_tokens,
-                            model,
-                        )
-                        chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics
-                    yield chunk
-
-            return stream_generator()
-        else:
-            response = await provider.chat_completion(**params)
-            completion_tokens = await self._count_tokens(
-                [response.completion_message],
-                tool_config.tool_prompt_format,
-            )
-            total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
-            metrics = await self._compute_and_log_token_usage(
-                prompt_tokens or 0,
-                completion_tokens or 0,
-                total_tokens,
-                model,
-            )
-            response.metrics = metrics if response.metrics is None else response.metrics + metrics
-            return response
-
-    async def batch_chat_completion(
-        self,
-        model_id: str,
-        messages_batch: List[List[Message]],
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> BatchChatCompletionResponse:
-        logger.debug(
-            f"InferenceRouter.batch_chat_completion: {model_id=}, {len(messages_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
-        )
-        provider = self.routing_table.get_provider_impl(model_id)
-        return await provider.batch_chat_completion(
-            model_id=model_id,
-            messages_batch=messages_batch,
-            tools=tools,
-            tool_config=tool_config,
-            sampling_params=sampling_params,
-            response_format=response_format,
-            logprobs=logprobs,
-        )
-
-    async def completion(
-        self,
-        model_id: str,
-        content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> AsyncGenerator:
-        if sampling_params is None:
-            sampling_params = SamplingParams()
-        logger.debug(
-            f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}",
-        )
-        model = await self.routing_table.get_model(model_id)
-        if model is None:
-            raise ValueError(f"Model '{model_id}' not found")
-        if model.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
-        provider = self.routing_table.get_provider_impl(model_id)
-        params = dict(
-            model_id=model_id,
-            content=content,
-            sampling_params=sampling_params,
-            response_format=response_format,
-            stream=stream,
-            logprobs=logprobs,
-        )
-
-        prompt_tokens = await self._count_tokens(content)
-
-        if stream:
-
-            async def stream_generator():
-                completion_text = ""
-                async for chunk in await provider.completion(**params):
-                    if hasattr(chunk, "delta"):
-                        completion_text += chunk.delta
-                    if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry:
-                        completion_tokens = await self._count_tokens(completion_text)
-                        total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
-                        metrics = await self._compute_and_log_token_usage(
-                            prompt_tokens or 0,
-                            completion_tokens or 0,
-                            total_tokens,
-                            model,
-                        )
-                        chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics
-                    yield chunk
-
-            return stream_generator()
-        else:
-            response = await provider.completion(**params)
-            completion_tokens = await self._count_tokens(response.content)
-            total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
-            metrics = await self._compute_and_log_token_usage(
-                prompt_tokens or 0,
-                completion_tokens or 0,
-                total_tokens,
-                model,
-            )
-            response.metrics = metrics if response.metrics is None else response.metrics + metrics
-            return response
-
-    async def batch_completion(
-        self,
-        model_id: str,
-        content_batch: List[InterleavedContent],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> BatchCompletionResponse:
-        logger.debug(
-            f"InferenceRouter.batch_completion: {model_id=}, {len(content_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
-        )
-        provider = self.routing_table.get_provider_impl(model_id)
-        return await provider.batch_completion(model_id, content_batch, sampling_params, response_format, logprobs)
-
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
-    ) -> EmbeddingsResponse:
-        logger.debug(f"InferenceRouter.embeddings: {model_id}")
-        model = await self.routing_table.get_model(model_id)
-        if model is None:
-            raise ValueError(f"Model '{model_id}' not found")
-        if model.model_type == ModelType.llm:
-            raise ValueError(f"Model '{model_id}' is an LLM model and does not support embeddings")
-        return await self.routing_table.get_provider_impl(model_id).embeddings(
-            model_id=model_id,
-            contents=contents,
-            text_truncation=text_truncation,
-            output_dimension=output_dimension,
-            task_type=task_type,
-        )
-
-    async def openai_completion(
-        self,
-        model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
-    ) -> OpenAICompletion:
-        logger.debug(
-            f"InferenceRouter.openai_completion: {model=}, {stream=}, {prompt=}",
-        )
-        model_obj = await self.routing_table.get_model(model)
-        if model_obj is None:
-            raise ValueError(f"Model '{model}' not found")
-        if model_obj.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model}' is an embedding model and does not support completions")
-
-        params = dict(
-            model=model_obj.identifier,
-            prompt=prompt,
-            best_of=best_of,
-            echo=echo,
-            frequency_penalty=frequency_penalty,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_tokens=max_tokens,
-            n=n,
-            presence_penalty=presence_penalty,
-            seed=seed,
-            stop=stop,
-            stream=stream,
-            stream_options=stream_options,
-            temperature=temperature,
-            top_p=top_p,
-            user=user,
-            guided_choice=guided_choice,
-            prompt_logprobs=prompt_logprobs,
-        )
-
-        provider = self.routing_table.get_provider_impl(model_obj.identifier)
-        return await provider.openai_completion(**params)
-
-    async def openai_chat_completion(
-        self,
-        model: str,
-        messages: List[OpenAIMessageParam],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-    ) -> OpenAIChatCompletion:
-        logger.debug(
-            f"InferenceRouter.openai_chat_completion: {model=}, {stream=}, {messages=}",
-        )
-        model_obj = await self.routing_table.get_model(model)
-        if model_obj is None:
-            raise ValueError(f"Model '{model}' not found")
-        if model_obj.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions")
-
-        params = dict(
-            model=model_obj.identifier,
-            messages=messages,
-            frequency_penalty=frequency_penalty,
-            function_call=function_call,
-            functions=functions,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_completion_tokens=max_completion_tokens,
-            max_tokens=max_tokens,
-            n=n,
-            parallel_tool_calls=parallel_tool_calls,
-            presence_penalty=presence_penalty,
-            response_format=response_format,
-            seed=seed,
-            stop=stop,
-            stream=stream,
-            stream_options=stream_options,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            tools=tools,
-            top_logprobs=top_logprobs,
-            top_p=top_p,
-            user=user,
-        )
-
-        provider = self.routing_table.get_provider_impl(model_obj.identifier)
-        return await provider.openai_chat_completion(**params)
-
-
-class SafetyRouter(Safety):
-    def __init__(
-        self,
-        routing_table: RoutingTable,
-    ) -> None:
-        logger.debug("Initializing SafetyRouter")
-        self.routing_table = routing_table
-
-    async def initialize(self) -> None:
-        logger.debug("SafetyRouter.initialize")
-        pass
-
-    async def shutdown(self) -> None:
-        logger.debug("SafetyRouter.shutdown")
-        pass
-
-    async def register_shield(
-        self,
-        shield_id: str,
-        provider_shield_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-        params: Optional[Dict[str, Any]] = None,
-    ) -> Shield:
-        logger.debug(f"SafetyRouter.register_shield: {shield_id}")
-        return await self.routing_table.register_shield(shield_id, provider_shield_id, provider_id, params)
-
-    async def run_shield(
-        self,
-        shield_id: str,
-        messages: List[Message],
-        params: Dict[str, Any] = None,
-    ) -> RunShieldResponse:
-        logger.debug(f"SafetyRouter.run_shield: {shield_id}")
-        return await self.routing_table.get_provider_impl(shield_id).run_shield(
-            shield_id=shield_id,
-            messages=messages,
-            params=params,
-        )
-
-
-class DatasetIORouter(DatasetIO):
-    def __init__(
-        self,
-        routing_table: RoutingTable,
-    ) -> None:
-        logger.debug("Initializing DatasetIORouter")
-        self.routing_table = routing_table
-
-    async def initialize(self) -> None:
-        logger.debug("DatasetIORouter.initialize")
-        pass
-
-    async def shutdown(self) -> None:
-        logger.debug("DatasetIORouter.shutdown")
-        pass
-
-    async def register_dataset(
-        self,
-        purpose: DatasetPurpose,
-        source: DataSource,
-        metadata: Optional[Dict[str, Any]] = None,
-        dataset_id: Optional[str] = None,
-    ) -> None:
-        logger.debug(
-            f"DatasetIORouter.register_dataset: {purpose=} {source=} {metadata=} {dataset_id=}",
-        )
-        await self.routing_table.register_dataset(
-            purpose=purpose,
-            source=source,
-            metadata=metadata,
-            dataset_id=dataset_id,
-        )
-
-    async def iterrows(
-        self,
-        dataset_id: str,
-        start_index: Optional[int] = None,
-        limit: Optional[int] = None,
-    ) -> PaginatedResponse:
-        logger.debug(
-            f"DatasetIORouter.iterrows: {dataset_id}, {start_index=} {limit=}",
-        )
-        return await self.routing_table.get_provider_impl(dataset_id).iterrows(
-            dataset_id=dataset_id,
-            start_index=start_index,
-            limit=limit,
-        )
-
-    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
-        logger.debug(f"DatasetIORouter.append_rows: {dataset_id}, {len(rows)} rows")
-        return await self.routing_table.get_provider_impl(dataset_id).append_rows(
-            dataset_id=dataset_id,
-            rows=rows,
-        )
-
-
-class ScoringRouter(Scoring):
-    def __init__(
-        self,
-        routing_table: RoutingTable,
-    ) -> None:
-        logger.debug("Initializing ScoringRouter")
-        self.routing_table = routing_table
-
-    async def initialize(self) -> None:
-        logger.debug("ScoringRouter.initialize")
-        pass
-
-    async def shutdown(self) -> None:
-        logger.debug("ScoringRouter.shutdown")
-        pass
-
-    async def score_batch(
-        self,
-        dataset_id: str,
-        scoring_functions: Dict[str, Optional[ScoringFnParams]] = None,
-        save_results_dataset: bool = False,
-    ) -> ScoreBatchResponse:
-        logger.debug(f"ScoringRouter.score_batch: {dataset_id}")
-        res = {}
-        for fn_identifier in scoring_functions.keys():
-            score_response = await self.routing_table.get_provider_impl(fn_identifier).score_batch(
-                dataset_id=dataset_id,
-                scoring_functions={fn_identifier: scoring_functions[fn_identifier]},
-            )
-            res.update(score_response.results)
-
-        if save_results_dataset:
-            raise NotImplementedError("Save results dataset not implemented yet")
-
-        return ScoreBatchResponse(
-            results=res,
-        )
-
-    async def score(
-        self,
-        input_rows: List[Dict[str, Any]],
-        scoring_functions: Dict[str, Optional[ScoringFnParams]] = None,
-    ) -> ScoreResponse:
-        logger.debug(f"ScoringRouter.score: {len(input_rows)} rows, {len(scoring_functions)} functions")
-        res = {}
-        # look up and map each scoring function to its provider impl
-        for fn_identifier in scoring_functions.keys():
-            score_response = await self.routing_table.get_provider_impl(fn_identifier).score(
-                input_rows=input_rows,
-                scoring_functions={fn_identifier: scoring_functions[fn_identifier]},
-            )
-            res.update(score_response.results)
-
-        return ScoreResponse(results=res)
-
-
-class EvalRouter(Eval):
-    def __init__(
-        self,
-        routing_table: RoutingTable,
-    ) -> None:
-        logger.debug("Initializing EvalRouter")
-        self.routing_table = routing_table
-
-    async def initialize(self) -> None:
-        logger.debug("EvalRouter.initialize")
-        pass
-
-    async def shutdown(self) -> None:
-        logger.debug("EvalRouter.shutdown")
-        pass
-
-    async def run_eval(
-        self,
-        benchmark_id: str,
-        benchmark_config: BenchmarkConfig,
-    ) -> Job:
-        logger.debug(f"EvalRouter.run_eval: {benchmark_id}")
-        return await self.routing_table.get_provider_impl(benchmark_id).run_eval(
-            benchmark_id=benchmark_id,
-            benchmark_config=benchmark_config,
-        )
-
-    async def evaluate_rows(
-        self,
-        benchmark_id: str,
-        input_rows: List[Dict[str, Any]],
-        scoring_functions: List[str],
-        benchmark_config: BenchmarkConfig,
-    ) -> EvaluateResponse:
-        logger.debug(f"EvalRouter.evaluate_rows: {benchmark_id}, {len(input_rows)} rows")
-        return await self.routing_table.get_provider_impl(benchmark_id).evaluate_rows(
-            benchmark_id=benchmark_id,
-            input_rows=input_rows,
-            scoring_functions=scoring_functions,
-            benchmark_config=benchmark_config,
-        )
-
-    async def job_status(
-        self,
-        benchmark_id: str,
-        job_id: str,
-    ) -> Job:
-        logger.debug(f"EvalRouter.job_status: {benchmark_id}, {job_id}")
-        return await self.routing_table.get_provider_impl(benchmark_id).job_status(benchmark_id, job_id)
-
-    async def job_cancel(
-        self,
-        benchmark_id: str,
-        job_id: str,
-    ) -> None:
-        logger.debug(f"EvalRouter.job_cancel: {benchmark_id}, {job_id}")
-        await self.routing_table.get_provider_impl(benchmark_id).job_cancel(
-            benchmark_id,
-            job_id,
-        )
-
-    async def job_result(
-        self,
-        benchmark_id: str,
-        job_id: str,
-    ) -> EvaluateResponse:
-        logger.debug(f"EvalRouter.job_result: {benchmark_id}, {job_id}")
-        return await self.routing_table.get_provider_impl(benchmark_id).job_result(
-            benchmark_id,
-            job_id,
-        )
-
-
-class ToolRuntimeRouter(ToolRuntime):
-    class RagToolImpl(RAGToolRuntime):
-        def __init__(
-            self,
-            routing_table: RoutingTable,
-        ) -> None:
-            logger.debug("Initializing ToolRuntimeRouter.RagToolImpl")
-            self.routing_table = routing_table
-
-        async def query(
-            self,
-            content: InterleavedContent,
-            vector_db_ids: List[str],
-            query_config: Optional[RAGQueryConfig] = None,
-        ) -> RAGQueryResult:
-            logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}")
-            return await self.routing_table.get_provider_impl("knowledge_search").query(
-                content, vector_db_ids, query_config
-            )
-
-        async def insert(
-            self,
-            documents: List[RAGDocument],
-            vector_db_id: str,
-            chunk_size_in_tokens: int = 512,
-        ) -> None:
-            logger.debug(
-                f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
-            )
-            return await self.routing_table.get_provider_impl("insert_into_memory").insert(
-                documents, vector_db_id, chunk_size_in_tokens
-            )
-
-    def __init__(
-        self,
-        routing_table: RoutingTable,
-    ) -> None:
-        logger.debug("Initializing ToolRuntimeRouter")
-        self.routing_table = routing_table
-
-        # HACK ALERT this should be in sync with "get_all_api_endpoints()"
-        self.rag_tool = self.RagToolImpl(routing_table)
-        for method in ("query", "insert"):
-            setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
-
-    async def initialize(self) -> None:
-        logger.debug("ToolRuntimeRouter.initialize")
-        pass
-
-    async def shutdown(self) -> None:
-        logger.debug("ToolRuntimeRouter.shutdown")
-        pass
-
-    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> Any:
-        logger.debug(f"ToolRuntimeRouter.invoke_tool: {tool_name}")
-        return await self.routing_table.get_provider_impl(tool_name).invoke_tool(
-            tool_name=tool_name,
-            kwargs=kwargs,
-        )
-
-    async def list_runtime_tools(
-        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
-    ) -> ListToolDefsResponse:
-        logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
-        return await self.routing_table.get_provider_impl(tool_group_id).list_tools(tool_group_id, mcp_endpoint)
diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
deleted file mode 100644
index 18b0c891f..000000000
--- a/llama_stack/distribution/routers/routing_tables.py
+++ /dev/null
@@ -1,631 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import logging
-import time
-import uuid
-from typing import Any, Dict, List, Optional
-
-from pydantic import TypeAdapter
-
-from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.datasets import (
-    Dataset,
-    DatasetPurpose,
-    Datasets,
-    DatasetType,
-    DataSource,
-    ListDatasetsResponse,
-    RowsDataSource,
-    URIDataSource,
-)
-from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.scoring_functions import (
-    ListScoringFunctionsResponse,
-    ScoringFn,
-    ScoringFnParams,
-    ScoringFunctions,
-)
-from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
-from llama_stack.apis.tools import (
-    ListToolGroupsResponse,
-    ListToolsResponse,
-    Tool,
-    ToolGroup,
-    ToolGroups,
-    ToolHost,
-)
-from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
-from llama_stack.distribution.access_control import check_access
-from llama_stack.distribution.datatypes import (
-    AccessAttributes,
-    BenchmarkWithACL,
-    DatasetWithACL,
-    ModelWithACL,
-    RoutableObject,
-    RoutableObjectWithProvider,
-    RoutedProtocol,
-    ScoringFnWithACL,
-    ShieldWithACL,
-    ToolGroupWithACL,
-    ToolWithACL,
-    VectorDBWithACL,
-)
-from llama_stack.distribution.request_headers import get_auth_attributes
-from llama_stack.distribution.store import DistributionRegistry
-from llama_stack.providers.datatypes import Api, RoutingTable
-
-logger = logging.getLogger(__name__)
-
-
-def get_impl_api(p: Any) -> Api:
-    return p.__provider_spec__.api
-
-
-# TODO: this should return the registered object for all APIs
-async def register_object_with_provider(obj: RoutableObject, p: Any) -> RoutableObject:
-    api = get_impl_api(p)
-
-    assert obj.provider_id != "remote", "Remote provider should not be registered"
-
-    if api == Api.inference:
-        return await p.register_model(obj)
-    elif api == Api.safety:
-        return await p.register_shield(obj)
-    elif api == Api.vector_io:
-        return await p.register_vector_db(obj)
-    elif api == Api.datasetio:
-        return await p.register_dataset(obj)
-    elif api == Api.scoring:
-        return await p.register_scoring_function(obj)
-    elif api == Api.eval:
-        return await p.register_benchmark(obj)
-    elif api == Api.tool_runtime:
-        return await p.register_tool(obj)
-    else:
-        raise ValueError(f"Unknown API {api} for registering object with provider")
-
-
-async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
-    api = get_impl_api(p)
-    if api == Api.vector_io:
-        return await p.unregister_vector_db(obj.identifier)
-    elif api == Api.inference:
-        return await p.unregister_model(obj.identifier)
-    elif api == Api.datasetio:
-        return await p.unregister_dataset(obj.identifier)
-    elif api == Api.tool_runtime:
-        return await p.unregister_tool(obj.identifier)
-    else:
-        raise ValueError(f"Unregister not supported for {api}")
-
-
-Registry = Dict[str, List[RoutableObjectWithProvider]]
-
-
-class CommonRoutingTableImpl(RoutingTable):
-    def __init__(
-        self,
-        impls_by_provider_id: Dict[str, RoutedProtocol],
-        dist_registry: DistributionRegistry,
-    ) -> None:
-        self.impls_by_provider_id = impls_by_provider_id
-        self.dist_registry = dist_registry
-
-    async def initialize(self) -> None:
-        async def add_objects(objs: List[RoutableObjectWithProvider], provider_id: str, cls) -> None:
-            for obj in objs:
-                if cls is None:
-                    obj.provider_id = provider_id
-                else:
-                    # Create a copy of the model data and explicitly set provider_id
-                    model_data = obj.model_dump()
-                    model_data["provider_id"] = provider_id
-                    obj = cls(**model_data)
-                await self.dist_registry.register(obj)
-
-        # Register all objects from providers
-        for pid, p in self.impls_by_provider_id.items():
-            api = get_impl_api(p)
-            if api == Api.inference:
-                p.model_store = self
-            elif api == Api.safety:
-                p.shield_store = self
-            elif api == Api.vector_io:
-                p.vector_db_store = self
-            elif api == Api.datasetio:
-                p.dataset_store = self
-            elif api == Api.scoring:
-                p.scoring_function_store = self
-                scoring_functions = await p.list_scoring_functions()
-                await add_objects(scoring_functions, pid, ScoringFn)
-            elif api == Api.eval:
-                p.benchmark_store = self
-            elif api == Api.tool_runtime:
-                p.tool_store = self
-
-    async def shutdown(self) -> None:
-        for p in self.impls_by_provider_id.values():
-            await p.shutdown()
-
-    def get_provider_impl(self, routing_key: str, provider_id: Optional[str] = None) -> Any:
-        def apiname_object():
-            if isinstance(self, ModelsRoutingTable):
-                return ("Inference", "model")
-            elif isinstance(self, ShieldsRoutingTable):
-                return ("Safety", "shield")
-            elif isinstance(self, VectorDBsRoutingTable):
-                return ("VectorIO", "vector_db")
-            elif isinstance(self, DatasetsRoutingTable):
-                return ("DatasetIO", "dataset")
-            elif isinstance(self, ScoringFunctionsRoutingTable):
-                return ("Scoring", "scoring_function")
-            elif isinstance(self, BenchmarksRoutingTable):
-                return ("Eval", "benchmark")
-            elif isinstance(self, ToolGroupsRoutingTable):
-                return ("Tools", "tool")
-            else:
-                raise ValueError("Unknown routing table type")
-
-        apiname, objtype = apiname_object()
-
-        # Get objects from disk registry
-        obj = self.dist_registry.get_cached(objtype, routing_key)
-        if not obj:
-            provider_ids = list(self.impls_by_provider_id.keys())
-            if len(provider_ids) > 1:
-                provider_ids_str = f"any of the providers: {', '.join(provider_ids)}"
-            else:
-                provider_ids_str = f"provider: `{provider_ids[0]}`"
-            raise ValueError(
-                f"{objtype.capitalize()} `{routing_key}` not served by {provider_ids_str}. Make sure there is an {apiname} provider serving this {objtype}."
-            )
-
-        if not provider_id or provider_id == obj.provider_id:
-            return self.impls_by_provider_id[obj.provider_id]
-
-        raise ValueError(f"Provider not found for `{routing_key}`")
-
-    async def get_object_by_identifier(self, type: str, identifier: str) -> Optional[RoutableObjectWithProvider]:
-        # Get from disk registry
-        obj = await self.dist_registry.get(type, identifier)
-        if not obj:
-            return None
-
-        # Check if user has permission to access this object
-        if not check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes()):
-            logger.debug(f"Access denied to {type} '{identifier}' based on attribute mismatch")
-            return None
-
-        return obj
-
-    async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
-        await self.dist_registry.delete(obj.type, obj.identifier)
-        await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
-
-    async def register_object(self, obj: RoutableObjectWithProvider) -> RoutableObjectWithProvider:
-        # if provider_id is not specified, pick an arbitrary one from existing entries
-        if not obj.provider_id and len(self.impls_by_provider_id) > 0:
-            obj.provider_id = list(self.impls_by_provider_id.keys())[0]
-
-        if obj.provider_id not in self.impls_by_provider_id:
-            raise ValueError(f"Provider `{obj.provider_id}` not found")
-
-        p = self.impls_by_provider_id[obj.provider_id]
-
-        # If object supports access control but no attributes set, use creator's attributes
-        if not obj.access_attributes:
-            creator_attributes = get_auth_attributes()
-            if creator_attributes:
-                obj.access_attributes = AccessAttributes(**creator_attributes)
-                logger.info(f"Setting access attributes for {obj.type} '{obj.identifier}' based on creator's identity")
-
-        registered_obj = await register_object_with_provider(obj, p)
-        # TODO: This needs to be fixed for all APIs once they return the registered object
-        if obj.type == ResourceType.model.value:
-            await self.dist_registry.register(registered_obj)
-            return registered_obj
-
-        else:
-            await self.dist_registry.register(obj)
-            return obj
-
-    async def get_all_with_type(self, type: str) -> List[RoutableObjectWithProvider]:
-        objs = await self.dist_registry.get_all()
-        filtered_objs = [obj for obj in objs if obj.type == type]
-
-        # Apply attribute-based access control filtering
-        if filtered_objs:
-            filtered_objs = [
-                obj
-                for obj in filtered_objs
-                if check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes())
-            ]
-
-        return filtered_objs
-
-
-class ModelsRoutingTable(CommonRoutingTableImpl, Models):
-    async def list_models(self) -> ListModelsResponse:
-        return ListModelsResponse(data=await self.get_all_with_type("model"))
-
-    async def openai_list_models(self) -> OpenAIListModelsResponse:
-        models = await self.get_all_with_type("model")
-        openai_models = [
-            OpenAIModel(
-                id=model.identifier,
-                object="model",
-                created=int(time.time()),
-                owned_by="llama_stack",
-            )
-            for model in models
-        ]
-        return OpenAIListModelsResponse(data=openai_models)
-
-    async def get_model(self, model_id: str) -> Model:
-        model = await self.get_object_by_identifier("model", model_id)
-        if model is None:
-            raise ValueError(f"Model '{model_id}' not found")
-        return model
-
-    async def register_model(
-        self,
-        model_id: str,
-        provider_model_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-        model_type: Optional[ModelType] = None,
-    ) -> Model:
-        if provider_model_id is None:
-            provider_model_id = model_id
-        if provider_id is None:
-            # If provider_id not specified, use the only provider if it supports this model
-            if len(self.impls_by_provider_id) == 1:
-                provider_id = list(self.impls_by_provider_id.keys())[0]
-            else:
-                raise ValueError(
-                    f"No provider specified and multiple providers available. Please specify a provider_id. Available providers: {self.impls_by_provider_id.keys()}"
-                )
-        if metadata is None:
-            metadata = {}
-        if model_type is None:
-            model_type = ModelType.llm
-        if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
-            raise ValueError("Embedding model must have an embedding dimension in its metadata")
-        model = ModelWithACL(
-            identifier=model_id,
-            provider_resource_id=provider_model_id,
-            provider_id=provider_id,
-            metadata=metadata,
-            model_type=model_type,
-        )
-        registered_model = await self.register_object(model)
-        return registered_model
-
-    async def unregister_model(self, model_id: str) -> None:
-        existing_model = await self.get_model(model_id)
-        if existing_model is None:
-            raise ValueError(f"Model {model_id} not found")
-        await self.unregister_object(existing_model)
-
-
-class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
-    async def list_shields(self) -> ListShieldsResponse:
-        return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value))
-
-    async def get_shield(self, identifier: str) -> Shield:
-        shield = await self.get_object_by_identifier("shield", identifier)
-        if shield is None:
-            raise ValueError(f"Shield '{identifier}' not found")
-        return shield
-
-    async def register_shield(
-        self,
-        shield_id: str,
-        provider_shield_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-        params: Optional[Dict[str, Any]] = None,
-    ) -> Shield:
-        if provider_shield_id is None:
-            provider_shield_id = shield_id
-        if provider_id is None:
-            # If provider_id not specified, use the only provider if it supports this shield type
-            if len(self.impls_by_provider_id) == 1:
-                provider_id = list(self.impls_by_provider_id.keys())[0]
-            else:
-                raise ValueError(
-                    "No provider specified and multiple providers available. Please specify a provider_id."
-                )
-        if params is None:
-            params = {}
-        shield = ShieldWithACL(
-            identifier=shield_id,
-            provider_resource_id=provider_shield_id,
-            provider_id=provider_id,
-            params=params,
-        )
-        await self.register_object(shield)
-        return shield
-
-
-class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
-    async def list_vector_dbs(self) -> ListVectorDBsResponse:
-        return ListVectorDBsResponse(data=await self.get_all_with_type("vector_db"))
-
-    async def get_vector_db(self, vector_db_id: str) -> VectorDB:
-        vector_db = await self.get_object_by_identifier("vector_db", vector_db_id)
-        if vector_db is None:
-            raise ValueError(f"Vector DB '{vector_db_id}' not found")
-        return vector_db
-
-    async def register_vector_db(
-        self,
-        vector_db_id: str,
-        embedding_model: str,
-        embedding_dimension: Optional[int] = 384,
-        provider_id: Optional[str] = None,
-        provider_vector_db_id: Optional[str] = None,
-    ) -> VectorDB:
-        if provider_vector_db_id is None:
-            provider_vector_db_id = vector_db_id
-        if provider_id is None:
-            if len(self.impls_by_provider_id) > 0:
-                provider_id = list(self.impls_by_provider_id.keys())[0]
-                if len(self.impls_by_provider_id) > 1:
-                    logger.warning(
-                        f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}."
-                    )
-            else:
-                raise ValueError("No provider available. Please configure a vector_io provider.")
-        model = await self.get_object_by_identifier("model", embedding_model)
-        if model is None:
-            raise ValueError(f"Model {embedding_model} not found")
-        if model.model_type != ModelType.embedding:
-            raise ValueError(f"Model {embedding_model} is not an embedding model")
-        if "embedding_dimension" not in model.metadata:
-            raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
-        vector_db_data = {
-            "identifier": vector_db_id,
-            "type": ResourceType.vector_db.value,
-            "provider_id": provider_id,
-            "provider_resource_id": provider_vector_db_id,
-            "embedding_model": embedding_model,
-            "embedding_dimension": model.metadata["embedding_dimension"],
-        }
-        vector_db = TypeAdapter(VectorDBWithACL).validate_python(vector_db_data)
-        await self.register_object(vector_db)
-        return vector_db
-
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        existing_vector_db = await self.get_vector_db(vector_db_id)
-        if existing_vector_db is None:
-            raise ValueError(f"Vector DB {vector_db_id} not found")
-        await self.unregister_object(existing_vector_db)
-
-
-class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
-    async def list_datasets(self) -> ListDatasetsResponse:
-        return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))
-
-    async def get_dataset(self, dataset_id: str) -> Dataset:
-        dataset = await self.get_object_by_identifier("dataset", dataset_id)
-        if dataset is None:
-            raise ValueError(f"Dataset '{dataset_id}' not found")
-        return dataset
-
-    async def register_dataset(
-        self,
-        purpose: DatasetPurpose,
-        source: DataSource,
-        metadata: Optional[Dict[str, Any]] = None,
-        dataset_id: Optional[str] = None,
-    ) -> Dataset:
-        if isinstance(source, dict):
-            if source["type"] == "uri":
-                source = URIDataSource.parse_obj(source)
-            elif source["type"] == "rows":
-                source = RowsDataSource.parse_obj(source)
-
-        if not dataset_id:
-            dataset_id = f"dataset-{str(uuid.uuid4())}"
-
-        provider_dataset_id = dataset_id
-
-        # infer provider from source
-        if source.type == DatasetType.rows.value:
-            provider_id = "localfs"
-        elif source.type == DatasetType.uri.value:
-            # infer provider from uri
-            if source.uri.startswith("huggingface"):
-                provider_id = "huggingface"
-            else:
-                provider_id = "localfs"
-        else:
-            raise ValueError(f"Unknown data source type: {source.type}")
-
-        if metadata is None:
-            metadata = {}
-
-        dataset = DatasetWithACL(
-            identifier=dataset_id,
-            provider_resource_id=provider_dataset_id,
-            provider_id=provider_id,
-            purpose=purpose,
-            source=source,
-            metadata=metadata,
-        )
-
-        await self.register_object(dataset)
-        return dataset
-
-    async def unregister_dataset(self, dataset_id: str) -> None:
-        dataset = await self.get_dataset(dataset_id)
-        if dataset is None:
-            raise ValueError(f"Dataset {dataset_id} not found")
-        await self.unregister_object(dataset)
-
-
-class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
-    async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
-        return ListScoringFunctionsResponse(data=await self.get_all_with_type(ResourceType.scoring_function.value))
-
-    async def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn:
-        scoring_fn = await self.get_object_by_identifier("scoring_function", scoring_fn_id)
-        if scoring_fn is None:
-            raise ValueError(f"Scoring function '{scoring_fn_id}' not found")
-        return scoring_fn
-
-    async def register_scoring_function(
-        self,
-        scoring_fn_id: str,
-        description: str,
-        return_type: ParamType,
-        provider_scoring_fn_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-        params: Optional[ScoringFnParams] = None,
-    ) -> None:
-        if provider_scoring_fn_id is None:
-            provider_scoring_fn_id = scoring_fn_id
-        if provider_id is None:
-            if len(self.impls_by_provider_id) == 1:
-                provider_id = list(self.impls_by_provider_id.keys())[0]
-            else:
-                raise ValueError(
-                    "No provider specified and multiple providers available. Please specify a provider_id."
-                )
-        scoring_fn = ScoringFnWithACL(
-            identifier=scoring_fn_id,
-            description=description,
-            return_type=return_type,
-            provider_resource_id=provider_scoring_fn_id,
-            provider_id=provider_id,
-            params=params,
-        )
-        scoring_fn.provider_id = provider_id
-        await self.register_object(scoring_fn)
-
-
-class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
-    async def list_benchmarks(self) -> ListBenchmarksResponse:
-        return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))
-
-    async def get_benchmark(self, benchmark_id: str) -> Benchmark:
-        benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
-        if benchmark is None:
-            raise ValueError(f"Benchmark '{benchmark_id}' not found")
-        return benchmark
-
-    async def register_benchmark(
-        self,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: List[str],
-        metadata: Optional[Dict[str, Any]] = None,
-        provider_benchmark_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
-    ) -> None:
-        if metadata is None:
-            metadata = {}
-        if provider_id is None:
-            if len(self.impls_by_provider_id) == 1:
-                provider_id = list(self.impls_by_provider_id.keys())[0]
-            else:
-                raise ValueError(
-                    "No provider specified and multiple providers available. Please specify a provider_id."
-                )
-        if provider_benchmark_id is None:
-            provider_benchmark_id = benchmark_id
-        benchmark = BenchmarkWithACL(
-            identifier=benchmark_id,
-            dataset_id=dataset_id,
-            scoring_functions=scoring_functions,
-            metadata=metadata,
-            provider_id=provider_id,
-            provider_resource_id=provider_benchmark_id,
-        )
-        await self.register_object(benchmark)
-
-
-class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
-    async def list_tools(self, toolgroup_id: Optional[str] = None) -> ListToolsResponse:
-        tools = await self.get_all_with_type("tool")
-        if toolgroup_id:
-            tools = [tool for tool in tools if tool.toolgroup_id == toolgroup_id]
-        return ListToolsResponse(data=tools)
-
-    async def list_tool_groups(self) -> ListToolGroupsResponse:
-        return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group"))
-
-    async def get_tool_group(self, toolgroup_id: str) -> ToolGroup:
-        tool_group = await self.get_object_by_identifier("tool_group", toolgroup_id)
-        if tool_group is None:
-            raise ValueError(f"Tool group '{toolgroup_id}' not found")
-        return tool_group
-
-    async def get_tool(self, tool_name: str) -> Tool:
-        return await self.get_object_by_identifier("tool", tool_name)
-
-    async def register_tool_group(
-        self,
-        toolgroup_id: str,
-        provider_id: str,
-        mcp_endpoint: Optional[URL] = None,
-        args: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        tools = []
-        tool_defs = await self.impls_by_provider_id[provider_id].list_runtime_tools(toolgroup_id, mcp_endpoint)
-        tool_host = ToolHost.model_context_protocol if mcp_endpoint else ToolHost.distribution
-
-        for tool_def in tool_defs.data:
-            tools.append(
-                ToolWithACL(
-                    identifier=tool_def.name,
-                    toolgroup_id=toolgroup_id,
-                    description=tool_def.description or "",
-                    parameters=tool_def.parameters or [],
-                    provider_id=provider_id,
-                    provider_resource_id=tool_def.name,
-                    metadata=tool_def.metadata,
-                    tool_host=tool_host,
-                )
-            )
-        for tool in tools:
-            existing_tool = await self.get_tool(tool.identifier)
-            # Compare existing and new object if one exists
-            if existing_tool:
-                existing_dict = existing_tool.model_dump()
-                new_dict = tool.model_dump()
-
-                if existing_dict != new_dict:
-                    raise ValueError(
-                        f"Object {tool.identifier} already exists in registry. Please use a different identifier."
-                    )
-            await self.register_object(tool)
-
-        await self.dist_registry.register(
-            ToolGroupWithACL(
-                identifier=toolgroup_id,
-                provider_id=provider_id,
-                provider_resource_id=toolgroup_id,
-                mcp_endpoint=mcp_endpoint,
-                args=args,
-            )
-        )
-
-    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
-        tool_group = await self.get_tool_group(toolgroup_id)
-        if tool_group is None:
-            raise ValueError(f"Tool group {toolgroup_id} not found")
-        tools = await self.list_tools(toolgroup_id)
-        for tool in getattr(tools, "data", []):
-            await self.unregister_object(tool)
-        await self.unregister_object(tool_group)
-
-    async def shutdown(self) -> None:
-        pass
diff --git a/llama_stack/distribution/routers/safety.py b/llama_stack/distribution/routers/safety.py
new file mode 100644
index 000000000..9761d2db0
--- /dev/null
+++ b/llama_stack/distribution/routers/safety.py
@@ -0,0 +1,57 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.inference import (
+    Message,
+)
+from llama_stack.apis.safety import RunShieldResponse, Safety
+from llama_stack.apis.shields import Shield
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import RoutingTable
+
+logger = get_logger(name=__name__, category="core")
+
+
+class SafetyRouter(Safety):
+    def __init__(
+        self,
+        routing_table: RoutingTable,
+    ) -> None:
+        logger.debug("Initializing SafetyRouter")
+        self.routing_table = routing_table
+
+    async def initialize(self) -> None:
+        logger.debug("SafetyRouter.initialize")
+        pass
+
+    async def shutdown(self) -> None:
+        logger.debug("SafetyRouter.shutdown")
+        pass
+
+    async def register_shield(
+        self,
+        shield_id: str,
+        provider_shield_id: str | None = None,
+        provider_id: str | None = None,
+        params: dict[str, Any] | None = None,
+    ) -> Shield:
+        logger.debug(f"SafetyRouter.register_shield: {shield_id}")
+        return await self.routing_table.register_shield(shield_id, provider_shield_id, provider_id, params)
+
+    async def run_shield(
+        self,
+        shield_id: str,
+        messages: list[Message],
+        params: dict[str, Any] = None,
+    ) -> RunShieldResponse:
+        logger.debug(f"SafetyRouter.run_shield: {shield_id}")
+        return await self.routing_table.get_provider_impl(shield_id).run_shield(
+            shield_id=shield_id,
+            messages=messages,
+            params=params,
+        )
diff --git a/llama_stack/distribution/routers/tool_runtime.py b/llama_stack/distribution/routers/tool_runtime.py
new file mode 100644
index 000000000..285843dbc
--- /dev/null
+++ b/llama_stack/distribution/routers/tool_runtime.py
@@ -0,0 +1,92 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.common.content_types import (
+    URL,
+    InterleavedContent,
+)
+from llama_stack.apis.tools import (
+    ListToolsResponse,
+    RAGDocument,
+    RAGQueryConfig,
+    RAGQueryResult,
+    RAGToolRuntime,
+    ToolRuntime,
+)
+from llama_stack.log import get_logger
+
+from ..routing_tables.toolgroups import ToolGroupsRoutingTable
+
+logger = get_logger(name=__name__, category="core")
+
+
+class ToolRuntimeRouter(ToolRuntime):
+    class RagToolImpl(RAGToolRuntime):
+        def __init__(
+            self,
+            routing_table: ToolGroupsRoutingTable,
+        ) -> None:
+            logger.debug("Initializing ToolRuntimeRouter.RagToolImpl")
+            self.routing_table = routing_table
+
+        async def query(
+            self,
+            content: InterleavedContent,
+            vector_db_ids: list[str],
+            query_config: RAGQueryConfig | None = None,
+        ) -> RAGQueryResult:
+            logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}")
+            return await self.routing_table.get_provider_impl("knowledge_search").query(
+                content, vector_db_ids, query_config
+            )
+
+        async def insert(
+            self,
+            documents: list[RAGDocument],
+            vector_db_id: str,
+            chunk_size_in_tokens: int = 512,
+        ) -> None:
+            logger.debug(
+                f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
+            )
+            return await self.routing_table.get_provider_impl("insert_into_memory").insert(
+                documents, vector_db_id, chunk_size_in_tokens
+            )
+
+    def __init__(
+        self,
+        routing_table: ToolGroupsRoutingTable,
+    ) -> None:
+        logger.debug("Initializing ToolRuntimeRouter")
+        self.routing_table = routing_table
+
+        # HACK ALERT this should be in sync with "get_all_api_endpoints()"
+        self.rag_tool = self.RagToolImpl(routing_table)
+        for method in ("query", "insert"):
+            setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
+
+    async def initialize(self) -> None:
+        logger.debug("ToolRuntimeRouter.initialize")
+        pass
+
+    async def shutdown(self) -> None:
+        logger.debug("ToolRuntimeRouter.shutdown")
+        pass
+
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> Any:
+        logger.debug(f"ToolRuntimeRouter.invoke_tool: {tool_name}")
+        return await self.routing_table.get_provider_impl(tool_name).invoke_tool(
+            tool_name=tool_name,
+            kwargs=kwargs,
+        )
+
+    async def list_runtime_tools(
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+    ) -> ListToolsResponse:
+        logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
+        return await self.routing_table.list_tools(tool_group_id)
diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py
new file mode 100644
index 000000000..8c17aa890
--- /dev/null
+++ b/llama_stack/distribution/routers/vector_io.py
@@ -0,0 +1,72 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.common.content_types import (
+    InterleavedContent,
+)
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import RoutingTable
+
+logger = get_logger(name=__name__, category="core")
+
+
+class VectorIORouter(VectorIO):
+    """Routes to an provider based on the vector db identifier"""
+
+    def __init__(
+        self,
+        routing_table: RoutingTable,
+    ) -> None:
+        logger.debug("Initializing VectorIORouter")
+        self.routing_table = routing_table
+
+    async def initialize(self) -> None:
+        logger.debug("VectorIORouter.initialize")
+        pass
+
+    async def shutdown(self) -> None:
+        logger.debug("VectorIORouter.shutdown")
+        pass
+
+    async def register_vector_db(
+        self,
+        vector_db_id: str,
+        embedding_model: str,
+        embedding_dimension: int | None = 384,
+        provider_id: str | None = None,
+        provider_vector_db_id: str | None = None,
+    ) -> None:
+        logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
+        await self.routing_table.register_vector_db(
+            vector_db_id,
+            embedding_model,
+            embedding_dimension,
+            provider_id,
+            provider_vector_db_id,
+        )
+
+    async def insert_chunks(
+        self,
+        vector_db_id: str,
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
+    ) -> None:
+        logger.debug(
+            f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.metadata['document_id'] for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}",
+        )
+        return await self.routing_table.get_provider_impl(vector_db_id).insert_chunks(vector_db_id, chunks, ttl_seconds)
+
+    async def query_chunks(
+        self,
+        vector_db_id: str,
+        query: InterleavedContent,
+        params: dict[str, Any] | None = None,
+    ) -> QueryChunksResponse:
+        logger.debug(f"VectorIORouter.query_chunks: {vector_db_id}")
+        return await self.routing_table.get_provider_impl(vector_db_id).query_chunks(vector_db_id, query, params)
diff --git a/llama_stack/providers/tests/__init__.py b/llama_stack/distribution/routing_tables/__init__.py
similarity index 100%
rename from llama_stack/providers/tests/__init__.py
rename to llama_stack/distribution/routing_tables/__init__.py
diff --git a/llama_stack/distribution/routing_tables/benchmarks.py b/llama_stack/distribution/routing_tables/benchmarks.py
new file mode 100644
index 000000000..589a00c02
--- /dev/null
+++ b/llama_stack/distribution/routing_tables/benchmarks.py
@@ -0,0 +1,58 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack.distribution.datatypes import (
+    BenchmarkWithACL,
+)
+from llama_stack.log import get_logger
+
+from .common import CommonRoutingTableImpl
+
+logger = get_logger(name=__name__, category="core")
+
+
+class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
+    async def list_benchmarks(self) -> ListBenchmarksResponse:
+        return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))
+
+    async def get_benchmark(self, benchmark_id: str) -> Benchmark:
+        benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
+        if benchmark is None:
+            raise ValueError(f"Benchmark '{benchmark_id}' not found")
+        return benchmark
+
+    async def register_benchmark(
+        self,
+        benchmark_id: str,
+        dataset_id: str,
+        scoring_functions: list[str],
+        metadata: dict[str, Any] | None = None,
+        provider_benchmark_id: str | None = None,
+        provider_id: str | None = None,
+    ) -> None:
+        if metadata is None:
+            metadata = {}
+        if provider_id is None:
+            if len(self.impls_by_provider_id) == 1:
+                provider_id = list(self.impls_by_provider_id.keys())[0]
+            else:
+                raise ValueError(
+                    "No provider specified and multiple providers available. Please specify a provider_id."
+                )
+        if provider_benchmark_id is None:
+            provider_benchmark_id = benchmark_id
+        benchmark = BenchmarkWithACL(
+            identifier=benchmark_id,
+            dataset_id=dataset_id,
+            scoring_functions=scoring_functions,
+            metadata=metadata,
+            provider_id=provider_id,
+            provider_resource_id=provider_benchmark_id,
+        )
+        await self.register_object(benchmark)
diff --git a/llama_stack/distribution/routing_tables/common.py b/llama_stack/distribution/routing_tables/common.py
new file mode 100644
index 000000000..8ec87ca50
--- /dev/null
+++ b/llama_stack/distribution/routing_tables/common.py
@@ -0,0 +1,218 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.resource import ResourceType
+from llama_stack.apis.scoring_functions import ScoringFn
+from llama_stack.distribution.access_control import check_access
+from llama_stack.distribution.datatypes import (
+    AccessAttributes,
+    RoutableObject,
+    RoutableObjectWithProvider,
+    RoutedProtocol,
+)
+from llama_stack.distribution.request_headers import get_auth_attributes
+from llama_stack.distribution.store import DistributionRegistry
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import Api, RoutingTable
+
+logger = get_logger(name=__name__, category="core")
+
+
+def get_impl_api(p: Any) -> Api:
+    return p.__provider_spec__.api
+
+
+# TODO: this should return the registered object for all APIs
+async def register_object_with_provider(obj: RoutableObject, p: Any) -> RoutableObject:
+    api = get_impl_api(p)
+
+    assert obj.provider_id != "remote", "Remote provider should not be registered"
+
+    if api == Api.inference:
+        return await p.register_model(obj)
+    elif api == Api.safety:
+        return await p.register_shield(obj)
+    elif api == Api.vector_io:
+        return await p.register_vector_db(obj)
+    elif api == Api.datasetio:
+        return await p.register_dataset(obj)
+    elif api == Api.scoring:
+        return await p.register_scoring_function(obj)
+    elif api == Api.eval:
+        return await p.register_benchmark(obj)
+    elif api == Api.tool_runtime:
+        return await p.register_toolgroup(obj)
+    else:
+        raise ValueError(f"Unknown API {api} for registering object with provider")
+
+
+async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
+    api = get_impl_api(p)
+    if api == Api.vector_io:
+        return await p.unregister_vector_db(obj.identifier)
+    elif api == Api.inference:
+        return await p.unregister_model(obj.identifier)
+    elif api == Api.datasetio:
+        return await p.unregister_dataset(obj.identifier)
+    elif api == Api.tool_runtime:
+        return await p.unregister_toolgroup(obj.identifier)
+    else:
+        raise ValueError(f"Unregister not supported for {api}")
+
+
+Registry = dict[str, list[RoutableObjectWithProvider]]
+
+
+class CommonRoutingTableImpl(RoutingTable):
+    def __init__(
+        self,
+        impls_by_provider_id: dict[str, RoutedProtocol],
+        dist_registry: DistributionRegistry,
+    ) -> None:
+        self.impls_by_provider_id = impls_by_provider_id
+        self.dist_registry = dist_registry
+
+    async def initialize(self) -> None:
+        async def add_objects(objs: list[RoutableObjectWithProvider], provider_id: str, cls) -> None:
+            for obj in objs:
+                if cls is None:
+                    obj.provider_id = provider_id
+                else:
+                    # Create a copy of the model data and explicitly set provider_id
+                    model_data = obj.model_dump()
+                    model_data["provider_id"] = provider_id
+                    obj = cls(**model_data)
+                await self.dist_registry.register(obj)
+
+        # Register all objects from providers
+        for pid, p in self.impls_by_provider_id.items():
+            api = get_impl_api(p)
+            if api == Api.inference:
+                p.model_store = self
+            elif api == Api.safety:
+                p.shield_store = self
+            elif api == Api.vector_io:
+                p.vector_db_store = self
+            elif api == Api.datasetio:
+                p.dataset_store = self
+            elif api == Api.scoring:
+                p.scoring_function_store = self
+                scoring_functions = await p.list_scoring_functions()
+                await add_objects(scoring_functions, pid, ScoringFn)
+            elif api == Api.eval:
+                p.benchmark_store = self
+            elif api == Api.tool_runtime:
+                p.tool_store = self
+
+    async def shutdown(self) -> None:
+        for p in self.impls_by_provider_id.values():
+            await p.shutdown()
+
+    def get_provider_impl(self, routing_key: str, provider_id: str | None = None) -> Any:
+        from .benchmarks import BenchmarksRoutingTable
+        from .datasets import DatasetsRoutingTable
+        from .models import ModelsRoutingTable
+        from .scoring_functions import ScoringFunctionsRoutingTable
+        from .shields import ShieldsRoutingTable
+        from .toolgroups import ToolGroupsRoutingTable
+        from .vector_dbs import VectorDBsRoutingTable
+
+        def apiname_object():
+            if isinstance(self, ModelsRoutingTable):
+                return ("Inference", "model")
+            elif isinstance(self, ShieldsRoutingTable):
+                return ("Safety", "shield")
+            elif isinstance(self, VectorDBsRoutingTable):
+                return ("VectorIO", "vector_db")
+            elif isinstance(self, DatasetsRoutingTable):
+                return ("DatasetIO", "dataset")
+            elif isinstance(self, ScoringFunctionsRoutingTable):
+                return ("Scoring", "scoring_function")
+            elif isinstance(self, BenchmarksRoutingTable):
+                return ("Eval", "benchmark")
+            elif isinstance(self, ToolGroupsRoutingTable):
+                return ("ToolGroups", "tool_group")
+            else:
+                raise ValueError("Unknown routing table type")
+
+        apiname, objtype = apiname_object()
+
+        # Get objects from disk registry
+        obj = self.dist_registry.get_cached(objtype, routing_key)
+        if not obj:
+            provider_ids = list(self.impls_by_provider_id.keys())
+            if len(provider_ids) > 1:
+                provider_ids_str = f"any of the providers: {', '.join(provider_ids)}"
+            else:
+                provider_ids_str = f"provider: `{provider_ids[0]}`"
+            raise ValueError(
+                f"{objtype.capitalize()} `{routing_key}` not served by {provider_ids_str}. Make sure there is an {apiname} provider serving this {objtype}."
+            )
+
+        if not provider_id or provider_id == obj.provider_id:
+            return self.impls_by_provider_id[obj.provider_id]
+
+        raise ValueError(f"Provider not found for `{routing_key}`")
+
+    async def get_object_by_identifier(self, type: str, identifier: str) -> RoutableObjectWithProvider | None:
+        # Get from disk registry
+        obj = await self.dist_registry.get(type, identifier)
+        if not obj:
+            return None
+
+        # Check if user has permission to access this object
+        if not check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes()):
+            logger.debug(f"Access denied to {type} '{identifier}' based on attribute mismatch")
+            return None
+
+        return obj
+
+    async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
+        await self.dist_registry.delete(obj.type, obj.identifier)
+        await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
+
+    async def register_object(self, obj: RoutableObjectWithProvider) -> RoutableObjectWithProvider:
+        # if provider_id is not specified, pick an arbitrary one from existing entries
+        if not obj.provider_id and len(self.impls_by_provider_id) > 0:
+            obj.provider_id = list(self.impls_by_provider_id.keys())[0]
+
+        if obj.provider_id not in self.impls_by_provider_id:
+            raise ValueError(f"Provider `{obj.provider_id}` not found")
+
+        p = self.impls_by_provider_id[obj.provider_id]
+
+        # If object supports access control but no attributes set, use creator's attributes
+        if not obj.access_attributes:
+            creator_attributes = get_auth_attributes()
+            if creator_attributes:
+                obj.access_attributes = AccessAttributes(**creator_attributes)
+                logger.info(f"Setting access attributes for {obj.type} '{obj.identifier}' based on creator's identity")
+
+        registered_obj = await register_object_with_provider(obj, p)
+        # TODO: This needs to be fixed for all APIs once they return the registered object
+        if obj.type == ResourceType.model.value:
+            await self.dist_registry.register(registered_obj)
+            return registered_obj
+
+        else:
+            await self.dist_registry.register(obj)
+            return obj
+
+    async def get_all_with_type(self, type: str) -> list[RoutableObjectWithProvider]:
+        objs = await self.dist_registry.get_all()
+        filtered_objs = [obj for obj in objs if obj.type == type]
+
+        # Apply attribute-based access control filtering
+        if filtered_objs:
+            filtered_objs = [
+                obj
+                for obj in filtered_objs
+                if check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes())
+            ]
+
+        return filtered_objs
diff --git a/llama_stack/distribution/routing_tables/datasets.py b/llama_stack/distribution/routing_tables/datasets.py
new file mode 100644
index 000000000..4401ad47e
--- /dev/null
+++ b/llama_stack/distribution/routing_tables/datasets.py
@@ -0,0 +1,93 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import uuid
+from typing import Any
+
+from llama_stack.apis.datasets import (
+    Dataset,
+    DatasetPurpose,
+    Datasets,
+    DatasetType,
+    DataSource,
+    ListDatasetsResponse,
+    RowsDataSource,
+    URIDataSource,
+)
+from llama_stack.apis.resource import ResourceType
+from llama_stack.distribution.datatypes import (
+    DatasetWithACL,
+)
+from llama_stack.log import get_logger
+
+from .common import CommonRoutingTableImpl
+
+logger = get_logger(name=__name__, category="core")
+
+
+class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
+    async def list_datasets(self) -> ListDatasetsResponse:
+        return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))
+
+    async def get_dataset(self, dataset_id: str) -> Dataset:
+        dataset = await self.get_object_by_identifier("dataset", dataset_id)
+        if dataset is None:
+            raise ValueError(f"Dataset '{dataset_id}' not found")
+        return dataset
+
+    async def register_dataset(
+        self,
+        purpose: DatasetPurpose,
+        source: DataSource,
+        metadata: dict[str, Any] | None = None,
+        dataset_id: str | None = None,
+    ) -> Dataset:
+        if isinstance(source, dict):
+            if source["type"] == "uri":
+                source = URIDataSource.parse_obj(source)
+            elif source["type"] == "rows":
+                source = RowsDataSource.parse_obj(source)
+
+        if not dataset_id:
+            dataset_id = f"dataset-{str(uuid.uuid4())}"
+
+        provider_dataset_id = dataset_id
+
+        # infer provider from source
+        if metadata:
+            if metadata.get("provider_id"):
+                provider_id = metadata.get("provider_id")  # pass through from nvidia datasetio
+        elif source.type == DatasetType.rows.value:
+            provider_id = "localfs"
+        elif source.type == DatasetType.uri.value:
+            # infer provider from uri
+            if source.uri.startswith("huggingface"):
+                provider_id = "huggingface"
+            else:
+                provider_id = "localfs"
+        else:
+            raise ValueError(f"Unknown data source type: {source.type}")
+
+        if metadata is None:
+            metadata = {}
+
+        dataset = DatasetWithACL(
+            identifier=dataset_id,
+            provider_resource_id=provider_dataset_id,
+            provider_id=provider_id,
+            purpose=purpose,
+            source=source,
+            metadata=metadata,
+        )
+
+        await self.register_object(dataset)
+        return dataset
+
+    async def unregister_dataset(self, dataset_id: str) -> None:
+        dataset = await self.get_dataset(dataset_id)
+        if dataset is None:
+            raise ValueError(f"Dataset {dataset_id} not found")
+        await self.unregister_object(dataset)
diff --git a/llama_stack/distribution/routing_tables/models.py b/llama_stack/distribution/routing_tables/models.py
new file mode 100644
index 000000000..7216d9935
--- /dev/null
+++ b/llama_stack/distribution/routing_tables/models.py
@@ -0,0 +1,82 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import time
+from typing import Any
+
+from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
+from llama_stack.distribution.datatypes import (
+    ModelWithACL,
+)
+from llama_stack.log import get_logger
+
+from .common import CommonRoutingTableImpl
+
+logger = get_logger(name=__name__, category="core")
+
+
+class ModelsRoutingTable(CommonRoutingTableImpl, Models):
+    async def list_models(self) -> ListModelsResponse:
+        return ListModelsResponse(data=await self.get_all_with_type("model"))
+
+    async def openai_list_models(self) -> OpenAIListModelsResponse:
+        models = await self.get_all_with_type("model")
+        openai_models = [
+            OpenAIModel(
+                id=model.identifier,
+                object="model",
+                created=int(time.time()),
+                owned_by="llama_stack",
+            )
+            for model in models
+        ]
+        return OpenAIListModelsResponse(data=openai_models)
+
+    async def get_model(self, model_id: str) -> Model:
+        model = await self.get_object_by_identifier("model", model_id)
+        if model is None:
+            raise ValueError(f"Model '{model_id}' not found")
+        return model
+
+    async def register_model(
+        self,
+        model_id: str,
+        provider_model_id: str | None = None,
+        provider_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+        model_type: ModelType | None = None,
+    ) -> Model:
+        if provider_model_id is None:
+            provider_model_id = model_id
+        if provider_id is None:
+            # If provider_id not specified, use the only provider if it supports this model
+            if len(self.impls_by_provider_id) == 1:
+                provider_id = list(self.impls_by_provider_id.keys())[0]
+            else:
+                raise ValueError(
+                    f"No provider specified and multiple providers available. Please specify a provider_id. Available providers: {self.impls_by_provider_id.keys()}"
+                )
+        if metadata is None:
+            metadata = {}
+        if model_type is None:
+            model_type = ModelType.llm
+        if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
+            raise ValueError("Embedding model must have an embedding dimension in its metadata")
+        model = ModelWithACL(
+            identifier=model_id,
+            provider_resource_id=provider_model_id,
+            provider_id=provider_id,
+            metadata=metadata,
+            model_type=model_type,
+        )
+        registered_model = await self.register_object(model)
+        return registered_model
+
+    async def unregister_model(self, model_id: str) -> None:
+        existing_model = await self.get_model(model_id)
+        if existing_model is None:
+            raise ValueError(f"Model {model_id} not found")
+        await self.unregister_object(existing_model)
diff --git a/llama_stack/distribution/routing_tables/scoring_functions.py b/llama_stack/distribution/routing_tables/scoring_functions.py
new file mode 100644
index 000000000..d85f64b57
--- /dev/null
+++ b/llama_stack/distribution/routing_tables/scoring_functions.py
@@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.common.type_system import ParamType
+from llama_stack.apis.resource import ResourceType
+from llama_stack.apis.scoring_functions import (
+    ListScoringFunctionsResponse,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctions,
+)
+from llama_stack.distribution.datatypes import (
+    ScoringFnWithACL,
+)
+from llama_stack.log import get_logger
+
+from .common import CommonRoutingTableImpl
+
+logger = get_logger(name=__name__, category="core")
+
+
+class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
+    async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
+        return ListScoringFunctionsResponse(data=await self.get_all_with_type(ResourceType.scoring_function.value))
+
+    async def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn:
+        scoring_fn = await self.get_object_by_identifier("scoring_function", scoring_fn_id)
+        if scoring_fn is None:
+            raise ValueError(f"Scoring function '{scoring_fn_id}' not found")
+        return scoring_fn
+
+    async def register_scoring_function(
+        self,
+        scoring_fn_id: str,
+        description: str,
+        return_type: ParamType,
+        provider_scoring_fn_id: str | None = None,
+        provider_id: str | None = None,
+        params: ScoringFnParams | None = None,
+    ) -> None:
+        if provider_scoring_fn_id is None:
+            provider_scoring_fn_id = scoring_fn_id
+        if provider_id is None:
+            if len(self.impls_by_provider_id) == 1:
+                provider_id = list(self.impls_by_provider_id.keys())[0]
+            else:
+                raise ValueError(
+                    "No provider specified and multiple providers available. Please specify a provider_id."
+                )
+        scoring_fn = ScoringFnWithACL(
+            identifier=scoring_fn_id,
+            description=description,
+            return_type=return_type,
+            provider_resource_id=provider_scoring_fn_id,
+            provider_id=provider_id,
+            params=params,
+        )
+        scoring_fn.provider_id = provider_id
+        await self.register_object(scoring_fn)
diff --git a/llama_stack/distribution/routing_tables/shields.py b/llama_stack/distribution/routing_tables/shields.py
new file mode 100644
index 000000000..7f62596c9
--- /dev/null
+++ b/llama_stack/distribution/routing_tables/shields.py
@@ -0,0 +1,57 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.resource import ResourceType
+from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
+from llama_stack.distribution.datatypes import (
+    ShieldWithACL,
+)
+from llama_stack.log import get_logger
+
+from .common import CommonRoutingTableImpl
+
+logger = get_logger(name=__name__, category="core")
+
+
+class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
+    async def list_shields(self) -> ListShieldsResponse:
+        return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value))
+
+    async def get_shield(self, identifier: str) -> Shield:
+        shield = await self.get_object_by_identifier("shield", identifier)
+        if shield is None:
+            raise ValueError(f"Shield '{identifier}' not found")
+        return shield
+
+    async def register_shield(
+        self,
+        shield_id: str,
+        provider_shield_id: str | None = None,
+        provider_id: str | None = None,
+        params: dict[str, Any] | None = None,
+    ) -> Shield:
+        if provider_shield_id is None:
+            provider_shield_id = shield_id
+        if provider_id is None:
+            # If provider_id not specified, use the only provider if it supports this shield type
+            if len(self.impls_by_provider_id) == 1:
+                provider_id = list(self.impls_by_provider_id.keys())[0]
+            else:
+                raise ValueError(
+                    "No provider specified and multiple providers available. Please specify a provider_id."
+                )
+        if params is None:
+            params = {}
+        shield = ShieldWithACL(
+            identifier=shield_id,
+            provider_resource_id=provider_shield_id,
+            provider_id=provider_id,
+            params=params,
+        )
+        await self.register_object(shield)
+        return shield
diff --git a/llama_stack/distribution/routing_tables/toolgroups.py b/llama_stack/distribution/routing_tables/toolgroups.py
new file mode 100644
index 000000000..2f7dc3e06
--- /dev/null
+++ b/llama_stack/distribution/routing_tables/toolgroups.py
@@ -0,0 +1,132 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.common.content_types import URL
+from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
+from llama_stack.distribution.datatypes import ToolGroupWithACL
+from llama_stack.log import get_logger
+
+from .common import CommonRoutingTableImpl
+
+logger = get_logger(name=__name__, category="core")
+
+
+def parse_toolgroup_from_toolgroup_name_pair(toolgroup_name_with_maybe_tool_name: str) -> str | None:
+    # handle the funny case like "builtin::rag/knowledge_search"
+    parts = toolgroup_name_with_maybe_tool_name.split("/")
+    if len(parts) == 2:
+        return parts[0]
+    else:
+        return None
+
+
+class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
+    toolgroups_to_tools: dict[str, list[Tool]] = {}
+    tool_to_toolgroup: dict[str, str] = {}
+
+    # overridden
+    def get_provider_impl(self, routing_key: str, provider_id: str | None = None) -> Any:
+        # we don't index tools in the registry anymore, but only keep a cache of them by toolgroup_id
+        # TODO: we may want to invalidate the cache (for a given toolgroup_id) every once in a while?
+
+        toolgroup_id = parse_toolgroup_from_toolgroup_name_pair(routing_key)
+        if toolgroup_id:
+            routing_key = toolgroup_id
+
+        if routing_key in self.tool_to_toolgroup:
+            routing_key = self.tool_to_toolgroup[routing_key]
+        return super().get_provider_impl(routing_key, provider_id)
+
+    async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
+        if toolgroup_id:
+            if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
+                toolgroup_id = group_id
+            toolgroups = [await self.get_tool_group(toolgroup_id)]
+        else:
+            toolgroups = await self.get_all_with_type("tool_group")
+
+        all_tools = []
+        for toolgroup in toolgroups:
+            if toolgroup.identifier not in self.toolgroups_to_tools:
+                await self._index_tools(toolgroup)
+            all_tools.extend(self.toolgroups_to_tools[toolgroup.identifier])
+
+        return ListToolsResponse(data=all_tools)
+
+    async def _index_tools(self, toolgroup: ToolGroup):
+        provider_impl = super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
+        tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
+
+        # TODO: kill this Tool vs ToolDef distinction
+        tooldefs = tooldefs_response.data
+        tools = []
+        for t in tooldefs:
+            tools.append(
+                Tool(
+                    identifier=t.name,
+                    toolgroup_id=toolgroup.identifier,
+                    description=t.description or "",
+                    parameters=t.parameters or [],
+                    metadata=t.metadata,
+                    provider_id=toolgroup.provider_id,
+                )
+            )
+
+        self.toolgroups_to_tools[toolgroup.identifier] = tools
+        for tool in tools:
+            self.tool_to_toolgroup[tool.identifier] = toolgroup.identifier
+
+    async def list_tool_groups(self) -> ListToolGroupsResponse:
+        return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group"))
+
+    async def get_tool_group(self, toolgroup_id: str) -> ToolGroup:
+        tool_group = await self.get_object_by_identifier("tool_group", toolgroup_id)
+        if tool_group is None:
+            raise ValueError(f"Tool group '{toolgroup_id}' not found")
+        return tool_group
+
+    async def get_tool(self, tool_name: str) -> Tool:
+        if tool_name in self.tool_to_toolgroup:
+            toolgroup_id = self.tool_to_toolgroup[tool_name]
+            tools = self.toolgroups_to_tools[toolgroup_id]
+            for tool in tools:
+                if tool.identifier == tool_name:
+                    return tool
+        raise ValueError(f"Tool '{tool_name}' not found")
+
+    async def register_tool_group(
+        self,
+        toolgroup_id: str,
+        provider_id: str,
+        mcp_endpoint: URL | None = None,
+        args: dict[str, Any] | None = None,
+    ) -> None:
+        toolgroup = ToolGroupWithACL(
+            identifier=toolgroup_id,
+            provider_id=provider_id,
+            provider_resource_id=toolgroup_id,
+            mcp_endpoint=mcp_endpoint,
+            args=args,
+        )
+        await self.register_object(toolgroup)
+
+        # ideally, indexing of the tools should not be necessary because anyone using
+        # the tools should first list the tools and then use them. but there are assumptions
+        # baked in some of the code and tests right now.
+        if not toolgroup.mcp_endpoint:
+            await self._index_tools(toolgroup)
+        return toolgroup
+
+    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
+        tool_group = await self.get_tool_group(toolgroup_id)
+        if tool_group is None:
+            raise ValueError(f"Tool group {toolgroup_id} not found")
+        await self.unregister_object(tool_group)
+
+    async def shutdown(self) -> None:
+        pass
diff --git a/llama_stack/distribution/routing_tables/vector_dbs.py b/llama_stack/distribution/routing_tables/vector_dbs.py
new file mode 100644
index 000000000..dc6c0d0ef
--- /dev/null
+++ b/llama_stack/distribution/routing_tables/vector_dbs.py
@@ -0,0 +1,74 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pydantic import TypeAdapter
+
+from llama_stack.apis.models import ModelType
+from llama_stack.apis.resource import ResourceType
+from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
+from llama_stack.distribution.datatypes import (
+    VectorDBWithACL,
+)
+from llama_stack.log import get_logger
+
+from .common import CommonRoutingTableImpl
+
+logger = get_logger(name=__name__, category="core")
+
+
+class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
+    async def list_vector_dbs(self) -> ListVectorDBsResponse:
+        return ListVectorDBsResponse(data=await self.get_all_with_type("vector_db"))
+
+    async def get_vector_db(self, vector_db_id: str) -> VectorDB:
+        vector_db = await self.get_object_by_identifier("vector_db", vector_db_id)
+        if vector_db is None:
+            raise ValueError(f"Vector DB '{vector_db_id}' not found")
+        return vector_db
+
+    async def register_vector_db(
+        self,
+        vector_db_id: str,
+        embedding_model: str,
+        embedding_dimension: int | None = 384,
+        provider_id: str | None = None,
+        provider_vector_db_id: str | None = None,
+    ) -> VectorDB:
+        if provider_vector_db_id is None:
+            provider_vector_db_id = vector_db_id
+        if provider_id is None:
+            if len(self.impls_by_provider_id) > 0:
+                provider_id = list(self.impls_by_provider_id.keys())[0]
+                if len(self.impls_by_provider_id) > 1:
+                    logger.warning(
+                        f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}."
+                    )
+            else:
+                raise ValueError("No provider available. Please configure a vector_io provider.")
+        model = await self.get_object_by_identifier("model", embedding_model)
+        if model is None:
+            raise ValueError(f"Model {embedding_model} not found")
+        if model.model_type != ModelType.embedding:
+            raise ValueError(f"Model {embedding_model} is not an embedding model")
+        if "embedding_dimension" not in model.metadata:
+            raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
+        vector_db_data = {
+            "identifier": vector_db_id,
+            "type": ResourceType.vector_db.value,
+            "provider_id": provider_id,
+            "provider_resource_id": provider_vector_db_id,
+            "embedding_model": embedding_model,
+            "embedding_dimension": model.metadata["embedding_dimension"],
+        }
+        vector_db = TypeAdapter(VectorDBWithACL).validate_python(vector_db_data)
+        await self.register_object(vector_db)
+        return vector_db
+
+    async def unregister_vector_db(self, vector_db_id: str) -> None:
+        existing_vector_db = await self.get_vector_db(vector_db_id)
+        if existing_vector_db is None:
+            raise ValueError(f"Vector DB {vector_db_id} not found")
+        await self.unregister_object(existing_vector_db)
diff --git a/llama_stack/distribution/server/auth.py b/llama_stack/distribution/server/auth.py
index 52e6a013c..fb26b49a7 100644
--- a/llama_stack/distribution/server/auth.py
+++ b/llama_stack/distribution/server/auth.py
@@ -5,74 +5,30 @@
 # the root directory of this source tree.
 
 import json
-from typing import Dict, List, Optional
-from urllib.parse import parse_qs
 
 import httpx
-from pydantic import BaseModel, Field
 
-from llama_stack.distribution.datatypes import AccessAttributes
+from llama_stack.distribution.datatypes import AuthenticationConfig
+from llama_stack.distribution.server.auth_providers import create_auth_provider
 from llama_stack.log import get_logger
 
 logger = get_logger(name=__name__, category="auth")
 
 
-class AuthRequestContext(BaseModel):
-    path: str = Field(description="The path of the request being authenticated")
-
-    headers: Dict[str, str] = Field(description="HTTP headers from the original request (excluding Authorization)")
-
-    params: Dict[str, List[str]] = Field(
-        description="Query parameters from the original request, parsed as dictionary of lists"
-    )
-
-
-class AuthRequest(BaseModel):
-    api_key: str = Field(description="The API key extracted from the Authorization header")
-
-    request: AuthRequestContext = Field(description="Context information about the request being authenticated")
-
-
-class AuthResponse(BaseModel):
-    """The format of the authentication response from the auth endpoint."""
-
-    access_attributes: Optional[AccessAttributes] = Field(
-        default=None,
-        description="""
-        Structured user attributes for attribute-based access control.
-
-        These attributes determine which resources the user can access.
-        The model provides standard categories like "roles", "teams", "projects", and "namespaces".
-        Each attribute category contains a list of values that the user has for that category.
-        During access control checks, these values are compared against resource requirements.
-
-        Example with standard categories:
-        ```json
-        {
-            "roles": ["admin", "data-scientist"],
-            "teams": ["ml-team"],
-            "projects": ["llama-3"],
-            "namespaces": ["research"]
-        }
-        ```
-        """,
-    )
-
-    message: Optional[str] = Field(
-        default=None, description="Optional message providing additional context about the authentication result."
-    )
-
-
 class AuthenticationMiddleware:
-    """Middleware that authenticates requests using an external auth endpoint.
+    """Middleware that authenticates requests using configured authentication provider.
 
     This middleware:
     1. Extracts the Bearer token from the Authorization header
-    2. Sends it to the configured auth endpoint along with request details
-    3. Validates the response and extracts user attributes
+    2. Uses the configured auth provider to validate the token
+    3. Extracts user attributes from the provider's response
     4. Makes these attributes available to the route handlers for access control
 
-    Authentication Request Format:
+    The middleware supports multiple authentication providers through the AuthProvider interface:
+    - Kubernetes: Validates tokens against the Kubernetes API server
+    - Custom: Validates tokens against a custom endpoint
+
+    Authentication Request Format for Custom Auth Provider:
     ```json
     {
         "api_key": "the-api-key-extracted-from-auth-header",
@@ -105,21 +61,26 @@ class AuthenticationMiddleware:
     }
     ```
 
+    Token Validation:
+    Each provider implements its own token validation logic:
+    - Kubernetes: Uses TokenReview API to validate service account tokens
+    - Custom: Sends token to custom endpoint for validation
+
     Attribute-Based Access Control:
-    The attributes returned by the auth endpoint are used to determine which
+    The attributes returned by the auth provider are used to determine which
     resources the user can access. Resources can specify required attributes
     using the access_attributes field. For a user to access a resource:
 
     1. All attribute categories specified in the resource must be present in the user's attributes
     2. For each category, the user must have at least one matching value
 
-    If the auth endpoint doesn't return any attributes, the user will only be able to
+    If the auth provider doesn't return any attributes, the user will only be able to
     access resources that don't have access_attributes defined.
     """
 
-    def __init__(self, app, auth_endpoint):
+    def __init__(self, app, auth_config: AuthenticationConfig):
         self.app = app
-        self.auth_endpoint = auth_endpoint
+        self.auth_provider = create_auth_provider(auth_config)
 
     async def __call__(self, scope, receive, send):
         if scope["type"] == "http":
@@ -129,66 +90,41 @@ class AuthenticationMiddleware:
             if not auth_header or not auth_header.startswith("Bearer "):
                 return await self._send_auth_error(send, "Missing or invalid Authorization header")
 
-            api_key = auth_header.split("Bearer ", 1)[1]
+            token = auth_header.split("Bearer ", 1)[1]
 
-            path = scope.get("path", "")
-            request_headers = {k.decode(): v.decode() for k, v in headers.items()}
-
-            # Remove sensitive headers
-            if "authorization" in request_headers:
-                del request_headers["authorization"]
-
-            query_string = scope.get("query_string", b"").decode()
-            params = parse_qs(query_string)
-
-            # Build the auth request model
-            auth_request = AuthRequest(
-                api_key=api_key,
-                request=AuthRequestContext(
-                    path=path,
-                    headers=request_headers,
-                    params=params,
-                ),
-            )
-
-            # Validate with authentication endpoint
+            # Validate token and get access attributes
             try:
-                async with httpx.AsyncClient() as client:
-                    response = await client.post(
-                        self.auth_endpoint,
-                        json=auth_request.model_dump(),
-                        timeout=10.0,  # Add a reasonable timeout
-                    )
-                    if response.status_code != 200:
-                        logger.warning(f"Authentication failed: {response.status_code}")
-                        return await self._send_auth_error(send, "Authentication failed")
-
-                    # Parse and validate the auth response
-                    try:
-                        response_data = response.json()
-                        auth_response = AuthResponse(**response_data)
-
-                        # Store attributes in request scope for access control
-                        if auth_response.access_attributes:
-                            user_attributes = auth_response.access_attributes.model_dump(exclude_none=True)
-                        else:
-                            logger.warning("No access attributes, setting namespace to api_key by default")
-                            user_attributes = {
-                                "namespaces": [api_key],
-                            }
-
-                        scope["user_attributes"] = user_attributes
-                        logger.debug(f"Authentication successful: {len(user_attributes)} attributes")
-                    except Exception:
-                        logger.exception("Error parsing authentication response")
-                        return await self._send_auth_error(send, "Invalid authentication response format")
+                validation_result = await self.auth_provider.validate_token(token, scope)
             except httpx.TimeoutException:
                 logger.exception("Authentication request timed out")
                 return await self._send_auth_error(send, "Authentication service timeout")
+            except ValueError as e:
+                logger.exception("Error during authentication")
+                return await self._send_auth_error(send, str(e))
             except Exception:
                 logger.exception("Error during authentication")
                 return await self._send_auth_error(send, "Authentication service error")
 
+            # Store attributes in request scope for access control
+            if validation_result.access_attributes:
+                user_attributes = validation_result.access_attributes.model_dump(exclude_none=True)
+            else:
+                logger.warning("No access attributes, setting namespace to token by default")
+                user_attributes = {
+                    "roles": [token],
+                }
+
+            # Store the client ID in the request scope so that downstream middleware (like QuotaMiddleware)
+            # can identify the requester and enforce per-client rate limits.
+            scope["authenticated_client_id"] = token
+
+            # Store attributes in request scope
+            scope["user_attributes"] = user_attributes
+            scope["principal"] = validation_result.principal
+            logger.debug(
+                f"Authentication successful: {validation_result.principal} with {len(scope['user_attributes'])} attributes"
+            )
+
         return await self.app(scope, receive, send)
 
     async def _send_auth_error(self, send, message):
diff --git a/llama_stack/distribution/server/auth_providers.py b/llama_stack/distribution/server/auth_providers.py
new file mode 100644
index 000000000..723a65b77
--- /dev/null
+++ b/llama_stack/distribution/server/auth_providers.py
@@ -0,0 +1,376 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import ssl
+import time
+from abc import ABC, abstractmethod
+from asyncio import Lock
+from pathlib import Path
+from urllib.parse import parse_qs
+
+import httpx
+from jose import jwt
+from pydantic import BaseModel, Field, field_validator, model_validator
+from typing_extensions import Self
+
+from llama_stack.distribution.datatypes import AccessAttributes, AuthenticationConfig, AuthProviderType
+from llama_stack.log import get_logger
+
+logger = get_logger(name=__name__, category="auth")
+
+
+class TokenValidationResult(BaseModel):
+    principal: str | None = Field(
+        default=None,
+        description="The principal (username or persistent identifier) of the authenticated user",
+    )
+    access_attributes: AccessAttributes | None = Field(
+        default=None,
+        description="""
+        Structured user attributes for attribute-based access control.
+
+        These attributes determine which resources the user can access.
+        The model provides standard categories like "roles", "teams", "projects", and "namespaces".
+        Each attribute category contains a list of values that the user has for that category.
+        During access control checks, these values are compared against resource requirements.
+
+        Example with standard categories:
+        ```json
+        {
+            "roles": ["admin", "data-scientist"],
+            "teams": ["ml-team"],
+            "projects": ["llama-3"],
+            "namespaces": ["research"]
+        }
+        ```
+        """,
+    )
+
+
+class AuthResponse(TokenValidationResult):
+    """The format of the authentication response from the auth endpoint."""
+
+    message: str | None = Field(
+        default=None, description="Optional message providing additional context about the authentication result."
+    )
+
+
+class AuthRequestContext(BaseModel):
+    path: str = Field(description="The path of the request being authenticated")
+
+    headers: dict[str, str] = Field(description="HTTP headers from the original request (excluding Authorization)")
+
+    params: dict[str, list[str]] = Field(
+        description="Query parameters from the original request, parsed as dictionary of lists"
+    )
+
+
+class AuthRequest(BaseModel):
+    api_key: str = Field(description="The API key extracted from the Authorization header")
+
+    request: AuthRequestContext = Field(description="Context information about the request being authenticated")
+
+
+class AuthProvider(ABC):
+    """Abstract base class for authentication providers."""
+
+    @abstractmethod
+    async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+        """Validate a token and return access attributes."""
+        pass
+
+    @abstractmethod
+    async def close(self):
+        """Clean up any resources."""
+        pass
+
+
+def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> AccessAttributes:
+    attributes = AccessAttributes()
+    for claim_key, attribute_key in mapping.items():
+        if claim_key not in claims or not hasattr(attributes, attribute_key):
+            continue
+        claim = claims[claim_key]
+        if isinstance(claim, list):
+            values = claim
+        else:
+            values = claim.split()
+
+        current = getattr(attributes, attribute_key)
+        if current:
+            current.extend(values)
+        else:
+            setattr(attributes, attribute_key, values)
+    return attributes
+
+
+class OAuth2JWKSConfig(BaseModel):
+    # The JWKS URI for collecting public keys
+    uri: str
+    key_recheck_period: int = Field(default=3600, description="The period to recheck the JWKS URI for key updates")
+
+
+class OAuth2IntrospectionConfig(BaseModel):
+    url: str
+    client_id: str
+    client_secret: str
+    send_secret_in_body: bool = False
+
+
+class OAuth2TokenAuthProviderConfig(BaseModel):
+    audience: str = "llama-stack"
+    verify_tls: bool = True
+    tls_cafile: Path | None = None
+    issuer: str | None = Field(default=None, description="The OIDC issuer URL.")
+    claims_mapping: dict[str, str] = Field(
+        default_factory=lambda: {
+            "sub": "roles",
+            "username": "roles",
+            "groups": "teams",
+            "team": "teams",
+            "project": "projects",
+            "tenant": "namespaces",
+            "namespace": "namespaces",
+        },
+    )
+    jwks: OAuth2JWKSConfig | None
+    introspection: OAuth2IntrospectionConfig | None = None
+
+    @classmethod
+    @field_validator("claims_mapping")
+    def validate_claims_mapping(cls, v):
+        for key, value in v.items():
+            if not value:
+                raise ValueError(f"claims_mapping value cannot be empty: {key}")
+            if value not in AccessAttributes.model_fields:
+                raise ValueError(f"claims_mapping value is not a valid attribute: {value}")
+        return v
+
+    @model_validator(mode="after")
+    def validate_mode(self) -> Self:
+        if not self.jwks and not self.introspection:
+            raise ValueError("One of jwks or introspection must be configured")
+        if self.jwks and self.introspection:
+            raise ValueError("At present only one of jwks or introspection should be configured")
+        return self
+
+
+class OAuth2TokenAuthProvider(AuthProvider):
+    """
+    JWT token authentication provider that validates a JWT token and extracts access attributes.
+
+    This should be the standard authentication provider for most use cases.
+    """
+
+    def __init__(self, config: OAuth2TokenAuthProviderConfig):
+        self.config = config
+        self._jwks_at: float = 0.0
+        self._jwks: dict[str, str] = {}
+        self._jwks_lock = Lock()
+
+    async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+        if self.config.jwks:
+            return await self.validate_jwt_token(token, scope)
+        if self.config.introspection:
+            return await self.introspect_token(token, scope)
+        raise ValueError("One of jwks or introspection must be configured")
+
+    async def validate_jwt_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+        """Validate a token using the JWT token."""
+        await self._refresh_jwks()
+
+        try:
+            header = jwt.get_unverified_header(token)
+            kid = header["kid"]
+            if kid not in self._jwks:
+                raise ValueError(f"Unknown key ID: {kid}")
+            key_data = self._jwks[kid]
+            algorithm = header.get("alg", "RS256")
+            claims = jwt.decode(
+                token,
+                key_data,
+                algorithms=[algorithm],
+                audience=self.config.audience,
+                issuer=self.config.issuer,
+            )
+        except Exception as exc:
+            raise ValueError(f"Invalid JWT token: {token}") from exc
+
+        # There are other standard claims, the most relevant of which is `scope`.
+        # We should incorporate these into the access attributes.
+        principal = claims["sub"]
+        access_attributes = get_attributes_from_claims(claims, self.config.claims_mapping)
+        return TokenValidationResult(
+            principal=principal,
+            access_attributes=access_attributes,
+        )
+
+    async def introspect_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+        """Validate a token using token introspection as defined by RFC 7662."""
+        form = {
+            "token": token,
+        }
+        if self.config.introspection is None:
+            raise ValueError("Introspection is not configured")
+
+        if self.config.introspection.send_secret_in_body:
+            form["client_id"] = self.config.introspection.client_id
+            form["client_secret"] = self.config.introspection.client_secret
+            auth = None
+        else:
+            auth = (self.config.introspection.client_id, self.config.introspection.client_secret)
+        ssl_ctxt = None
+        if self.config.tls_cafile:
+            ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix())
+        try:
+            async with httpx.AsyncClient(verify=ssl_ctxt) as client:
+                response = await client.post(
+                    self.config.introspection.url,
+                    data=form,
+                    auth=auth,
+                    timeout=10.0,  # Add a reasonable timeout
+                )
+                if response.status_code != 200:
+                    logger.warning(f"Token introspection failed with status code: {response.status_code}")
+                    raise ValueError(f"Token introspection failed: {response.status_code}")
+
+                fields = response.json()
+                if not fields["active"]:
+                    raise ValueError("Token not active")
+                principal = fields["sub"] or fields["username"]
+                access_attributes = get_attributes_from_claims(fields, self.config.claims_mapping)
+                return TokenValidationResult(
+                    principal=principal,
+                    access_attributes=access_attributes,
+                )
+        except httpx.TimeoutException:
+            logger.exception("Token introspection request timed out")
+            raise
+        except ValueError:
+            # Re-raise ValueError exceptions to preserve their message
+            raise
+        except Exception as e:
+            logger.exception("Error during token introspection")
+            raise ValueError("Token introspection error") from e
+
+    async def close(self):
+        pass
+
+    async def _refresh_jwks(self) -> None:
+        """
+        Refresh the JWKS cache.
+
+        This is a simple cache that expires after a certain amount of time (defined by `key_recheck_period`).
+        If the cache is expired, we refresh the JWKS from the JWKS URI.
+
+        Notes: for Kubernetes which doesn't fully implement the OIDC protocol:
+            * It doesn't have user authentication flows
+            * It doesn't have refresh tokens
+        """
+        async with self._jwks_lock:
+            if self.config.jwks is None:
+                raise ValueError("JWKS is not configured")
+            if time.time() - self._jwks_at > self.config.jwks.key_recheck_period:
+                verify = self.config.tls_cafile.as_posix() if self.config.tls_cafile else self.config.verify_tls
+                async with httpx.AsyncClient(verify=verify) as client:
+                    res = await client.get(self.config.jwks.uri, timeout=5)
+                    res.raise_for_status()
+                    jwks_data = res.json()["keys"]
+                    updated = {}
+                    for k in jwks_data:
+                        kid = k["kid"]
+                        # Store the entire key object as it may be needed for different algorithms
+                        updated[kid] = k
+                    self._jwks = updated
+                    self._jwks_at = time.time()
+
+
+class CustomAuthProviderConfig(BaseModel):
+    endpoint: str
+
+
+class CustomAuthProvider(AuthProvider):
+    """Custom authentication provider that uses an external endpoint."""
+
+    def __init__(self, config: CustomAuthProviderConfig):
+        self.config = config
+        self._client = None
+
+    async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+        """Validate a token using the custom authentication endpoint."""
+        if scope is None:
+            scope = {}
+
+        headers = dict(scope.get("headers", []))
+        path = scope.get("path", "")
+        request_headers = {k.decode(): v.decode() for k, v in headers.items()}
+
+        # Remove sensitive headers
+        if "authorization" in request_headers:
+            del request_headers["authorization"]
+
+        query_string = scope.get("query_string", b"").decode()
+        params = parse_qs(query_string)
+
+        # Build the auth request model
+        auth_request = AuthRequest(
+            api_key=token,
+            request=AuthRequestContext(
+                path=path,
+                headers=request_headers,
+                params=params,
+            ),
+        )
+
+        # Validate with authentication endpoint
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    self.config.endpoint,
+                    json=auth_request.model_dump(),
+                    timeout=10.0,  # Add a reasonable timeout
+                )
+                if response.status_code != 200:
+                    logger.warning(f"Authentication failed with status code: {response.status_code}")
+                    raise ValueError(f"Authentication failed: {response.status_code}")
+
+                # Parse and validate the auth response
+                try:
+                    response_data = response.json()
+                    auth_response = AuthResponse(**response_data)
+                    return auth_response
+                except Exception as e:
+                    logger.exception("Error parsing authentication response")
+                    raise ValueError("Invalid authentication response format") from e
+
+        except httpx.TimeoutException:
+            logger.exception("Authentication request timed out")
+            raise
+        except ValueError:
+            # Re-raise ValueError exceptions to preserve their message
+            raise
+        except Exception as e:
+            logger.exception("Error during authentication")
+            raise ValueError("Authentication service error") from e
+
+    async def close(self):
+        """Close the HTTP client."""
+        if self._client:
+            await self._client.aclose()
+            self._client = None
+
+
+def create_auth_provider(config: AuthenticationConfig) -> AuthProvider:
+    """Factory function to create the appropriate auth provider."""
+    provider_type = config.provider_type.lower()
+
+    if provider_type == "custom":
+        return CustomAuthProvider(CustomAuthProviderConfig.model_validate(config.config))
+    elif provider_type == "oauth2_token":
+        return OAuth2TokenAuthProvider(OAuth2TokenAuthProviderConfig.model_validate(config.config))
+    else:
+        supported_providers = ", ".join([t.value for t in AuthProviderType])
+        raise ValueError(f"Unsupported auth provider type: {provider_type}. Supported types are: {supported_providers}")
diff --git a/llama_stack/distribution/server/quota.py b/llama_stack/distribution/server/quota.py
new file mode 100644
index 000000000..ddbffae64
--- /dev/null
+++ b/llama_stack/distribution/server/quota.py
@@ -0,0 +1,110 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import time
+from datetime import datetime, timedelta, timezone
+
+from starlette.types import ASGIApp, Receive, Scope, Send
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
+
+logger = get_logger(name=__name__, category="quota")
+
+
+class QuotaMiddleware:
+    """
+    ASGI middleware that enforces separate quotas for authenticated and anonymous clients
+    within a configurable time window.
+
+    - For authenticated requests, it reads the client ID from the
+      `Authorization: Bearer <client_id>` header.
+    - For anonymous requests, it falls back to the IP address of the client.
+    Requests are counted in a KV store (e.g., SQLite), and HTTP 429 is returned
+    once a client exceeds its quota.
+    """
+
+    def __init__(
+        self,
+        app: ASGIApp,
+        kv_config: KVStoreConfig,
+        anonymous_max_requests: int,
+        authenticated_max_requests: int,
+        window_seconds: int = 86400,
+    ):
+        self.app = app
+        self.kv_config = kv_config
+        self.kv: KVStore | None = None
+        self.anonymous_max_requests = anonymous_max_requests
+        self.authenticated_max_requests = authenticated_max_requests
+        self.window_seconds = window_seconds
+
+        if isinstance(self.kv_config, SqliteKVStoreConfig):
+            logger.warning(
+                "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
+                f"window_seconds={self.window_seconds}"
+            )
+
+    async def _get_kv(self) -> KVStore:
+        if self.kv is None:
+            self.kv = await kvstore_impl(self.kv_config)
+        return self.kv
+
+    async def __call__(self, scope: Scope, receive: Receive, send: Send):
+        if scope["type"] == "http":
+            # pick key & limit based on auth
+            auth_id = scope.get("authenticated_client_id")
+            if auth_id:
+                key_id = auth_id
+                limit = self.authenticated_max_requests
+            else:
+                # fallback to IP
+                client = scope.get("client")
+                key_id = client[0] if client else "anonymous"
+                limit = self.anonymous_max_requests
+
+            current_window = int(time.time() // self.window_seconds)
+            key = f"quota:{key_id}:{current_window}"
+
+            try:
+                kv = await self._get_kv()
+                prev = await kv.get(key) or "0"
+                count = int(prev) + 1
+
+                if int(prev) == 0:
+                    # Set with expiration datetime when it is the first request in the window.
+                    expiration = datetime.now(timezone.utc) + timedelta(seconds=self.window_seconds)
+                    await kv.set(key, str(count), expiration=expiration)
+                else:
+                    await kv.set(key, str(count))
+            except Exception:
+                logger.exception("Failed to access KV store for quota")
+                return await self._send_error(send, 500, "Quota service error")
+
+            if count > limit:
+                logger.warning(
+                    "Quota exceeded for client %s: %d/%d",
+                    key_id,
+                    count,
+                    limit,
+                )
+                return await self._send_error(send, 429, "Quota exceeded")
+
+        return await self.app(scope, receive, send)
+
+    async def _send_error(self, send: Send, status: int, message: str):
+        await send(
+            {
+                "type": "http.response.start",
+                "status": status,
+                "headers": [[b"content-type", b"application/json"]],
+            }
+        )
+        body = json.dumps({"error": {"message": message}}).encode()
+        await send({"type": "http.response.body", "body": body})
diff --git a/llama_stack/distribution/server/endpoints.py b/llama_stack/distribution/server/routes.py
similarity index 55%
rename from llama_stack/distribution/server/endpoints.py
rename to llama_stack/distribution/server/routes.py
index 98f01c067..ea66fec5a 100644
--- a/llama_stack/distribution/server/endpoints.py
+++ b/llama_stack/distribution/server/routes.py
@@ -6,21 +6,23 @@
 
 import inspect
 import re
-from typing import Dict, List
+from collections.abc import Callable
+from typing import Any
 
-from pydantic import BaseModel
+from aiohttp import hdrs
+from starlette.routing import Route
 
 from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
 from llama_stack.apis.version import LLAMA_STACK_API_VERSION
 from llama_stack.distribution.resolver import api_protocol_map
 from llama_stack.providers.datatypes import Api
 
-
-class ApiEndpoint(BaseModel):
-    route: str
-    method: str
-    name: str
-    descriptive_name: str | None = None
+EndpointFunc = Callable[..., Any]
+PathParams = dict[str, str]
+RouteInfo = tuple[EndpointFunc, str]
+PathImpl = dict[str, RouteInfo]
+RouteImpls = dict[str, PathImpl]
+RouteMatch = tuple[EndpointFunc, PathParams, str]
 
 
 def toolgroup_protocol_map():
@@ -29,13 +31,13 @@ def toolgroup_protocol_map():
     }
 
 
-def get_all_api_endpoints() -> Dict[Api, List[ApiEndpoint]]:
+def get_all_api_routes() -> dict[Api, list[Route]]:
     apis = {}
 
     protocols = api_protocol_map()
     toolgroup_protocols = toolgroup_protocol_map()
     for api, protocol in protocols.items():
-        endpoints = []
+        routes = []
         protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
 
         # HACK ALERT
@@ -52,26 +54,28 @@ def get_all_api_endpoints() -> Dict[Api, List[ApiEndpoint]]:
             if not hasattr(method, "__webmethod__"):
                 continue
 
-            webmethod = method.__webmethod__
-            route = f"/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
-            if webmethod.method == "GET":
-                method = "get"
-            elif webmethod.method == "DELETE":
-                method = "delete"
+            # The __webmethod__ attribute is dynamically added by the @webmethod decorator
+            # mypy doesn't know about this dynamic attribute, so we ignore the attr-defined error
+            webmethod = method.__webmethod__  # type: ignore[attr-defined]
+            path = f"/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
+            if webmethod.method == hdrs.METH_GET:
+                http_method = hdrs.METH_GET
+            elif webmethod.method == hdrs.METH_DELETE:
+                http_method = hdrs.METH_DELETE
             else:
-                method = "post"
-            endpoints.append(
-                ApiEndpoint(route=route, method=method, name=name, descriptive_name=webmethod.descriptive_name)
-            )
+                http_method = hdrs.METH_POST
+            routes.append(
+                Route(path=path, methods=[http_method], name=name, endpoint=None)
+            )  # setting endpoint to None since don't use a Router object
 
-        apis[api] = endpoints
+        apis[api] = routes
 
     return apis
 
 
-def initialize_endpoint_impls(impls):
-    endpoints = get_all_api_endpoints()
-    endpoint_impls = {}
+def initialize_route_impls(impls: dict[Api, Any]) -> RouteImpls:
+    routes = get_all_api_routes()
+    route_impls: RouteImpls = {}
 
     def _convert_path_to_regex(path: str) -> str:
         # Convert {param} to named capture groups
@@ -84,29 +88,34 @@ def initialize_endpoint_impls(impls):
 
         return f"^{pattern}$"
 
-    for api, api_endpoints in endpoints.items():
+    for api, api_routes in routes.items():
         if api not in impls:
             continue
-        for endpoint in api_endpoints:
+        for route in api_routes:
             impl = impls[api]
-            func = getattr(impl, endpoint.name)
-            if endpoint.method not in endpoint_impls:
-                endpoint_impls[endpoint.method] = {}
-            endpoint_impls[endpoint.method][_convert_path_to_regex(endpoint.route)] = (
+            func = getattr(impl, route.name)
+            # Get the first (and typically only) method from the set, filtering out HEAD
+            available_methods = [m for m in route.methods if m != "HEAD"]
+            if not available_methods:
+                continue  # Skip if only HEAD method is available
+            method = available_methods[0].lower()
+            if method not in route_impls:
+                route_impls[method] = {}
+            route_impls[method][_convert_path_to_regex(route.path)] = (
                 func,
-                endpoint.descriptive_name or endpoint.route,
+                route.path,
             )
 
-    return endpoint_impls
+    return route_impls
 
 
-def find_matching_endpoint(method, path, endpoint_impls):
+def find_matching_route(method: str, path: str, route_impls: RouteImpls) -> RouteMatch:
     """Find the matching endpoint implementation for a given method and path.
 
     Args:
         method: HTTP method (GET, POST, etc.)
         path: URL path to match against
-        endpoint_impls: A dictionary of endpoint implementations
+        route_impls: A dictionary of endpoint implementations
 
     Returns:
         A tuple of (endpoint_function, path_params, descriptive_name)
@@ -114,7 +123,7 @@ def find_matching_endpoint(method, path, endpoint_impls):
     Raises:
         ValueError: If no matching endpoint is found
     """
-    impls = endpoint_impls.get(method.lower())
+    impls = route_impls.get(method.lower())
     if not impls:
         raise ValueError(f"No endpoint found for {path}")
 
diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py
index 7d4ec2a2f..6c88bbfe9 100644
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@@ -6,42 +6,49 @@
 
 import argparse
 import asyncio
+import functools
 import inspect
 import json
 import os
+import ssl
 import sys
 import traceback
 import warnings
+from collections.abc import Callable
 from contextlib import asynccontextmanager
 from importlib.metadata import version as parse_version
 from pathlib import Path
-from typing import Any, List, Optional, Union
+from typing import Annotated, Any
 
+import rich.pretty
 import yaml
+from aiohttp import hdrs
 from fastapi import Body, FastAPI, HTTPException, Request
 from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
+from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
+from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
-from typing_extensions import Annotated
 
-from llama_stack.distribution.datatypes import LoggingConfig, StackRunConfig
+from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
 from llama_stack.distribution.distribution import builtin_automatically_routed_apis
 from llama_stack.distribution.request_headers import (
     PROVIDER_DATA_VAR,
     request_provider_data_context,
 )
 from llama_stack.distribution.resolver import InvalidProviderError
-from llama_stack.distribution.server.endpoints import (
-    find_matching_endpoint,
-    initialize_endpoint_impls,
+from llama_stack.distribution.server.routes import (
+    find_matching_route,
+    get_all_api_routes,
+    initialize_route_impls,
 )
 from llama_stack.distribution.stack import (
     construct_stack,
-    redact_sensitive_fields,
     replace_env_vars,
     validate_env_pair,
 )
+from llama_stack.distribution.utils.config import redact_sensitive_fields
 from llama_stack.distribution.utils.context import preserve_contexts_async_generator
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
@@ -57,7 +64,7 @@ from llama_stack.providers.utils.telemetry.tracing import (
 )
 
 from .auth import AuthenticationMiddleware
-from .endpoints import get_all_api_endpoints
+from .quota import QuotaMiddleware
 
 REPO_ROOT = Path(__file__).parent.parent.parent.parent
 
@@ -90,9 +97,9 @@ async def global_exception_handler(request: Request, exc: Exception):
     return JSONResponse(status_code=http_exc.status_code, content={"error": {"detail": http_exc.detail}})
 
 
-def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidationError]:
+def translate_exception(exc: Exception) -> HTTPException | RequestValidationError:
     if isinstance(exc, ValidationError):
-        exc = RequestValidationError(exc.raw_errors)
+        exc = RequestValidationError(exc.errors())
 
     if isinstance(exc, RequestValidationError):
         return HTTPException(
@@ -110,12 +117,16 @@ def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidatio
         )
     elif isinstance(exc, ValueError):
         return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}")
+    elif isinstance(exc, BadRequestError):
+        return HTTPException(status_code=400, detail=str(exc))
     elif isinstance(exc, PermissionError):
         return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
-    elif isinstance(exc, TimeoutError):
+    elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
         return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}")
     elif isinstance(exc, NotImplementedError):
         return HTTPException(status_code=501, detail=f"Not implemented: {str(exc)}")
+    elif isinstance(exc, AuthenticationRequiredError):
+        return HTTPException(status_code=401, detail=f"Authentication required: {str(exc)}")
     else:
         return HTTPException(
             status_code=500,
@@ -137,7 +148,7 @@ async def shutdown(app):
                 await asyncio.wait_for(impl.shutdown(), timeout=5)
             else:
                 logger.warning("No shutdown method for %s", impl_name)
-        except asyncio.TimeoutError:
+        except (asyncio.TimeoutError, TimeoutError):
             logger.exception("Shutdown timeout for %s ", impl_name, exc_info=True)
         except (Exception, asyncio.CancelledError) as e:
             logger.exception("Failed to shutdown %s: %s", impl_name, {e})
@@ -162,14 +173,17 @@ async def maybe_await(value):
     return value
 
 
-async def sse_generator(event_gen):
+async def sse_generator(event_gen_coroutine):
+    event_gen = None
     try:
-        async for item in await event_gen:
+        event_gen = await event_gen_coroutine
+        async for item in event_gen:
             yield create_sse_event(item)
             await asyncio.sleep(0.01)
     except asyncio.CancelledError:
         logger.info("Generator cancelled")
-        await event_gen.aclose()
+        if event_gen:
+            await event_gen.aclose()
     except Exception as e:
         logger.exception("Error in sse_generator")
         yield create_sse_event(
@@ -181,11 +195,31 @@ async def sse_generator(event_gen):
         )
 
 
-def create_dynamic_typed_route(func: Any, method: str, route: str):
-    async def endpoint(request: Request, **kwargs):
+async def log_request_pre_validation(request: Request):
+    if request.method in ("POST", "PUT", "PATCH"):
+        try:
+            body_bytes = await request.body()
+            if body_bytes:
+                try:
+                    parsed_body = json.loads(body_bytes.decode())
+                    log_output = rich.pretty.pretty_repr(parsed_body)
+                except (json.JSONDecodeError, UnicodeDecodeError):
+                    log_output = repr(body_bytes)
+                logger.debug(f"Incoming raw request body for {request.method} {request.url.path}:\n{log_output}")
+            else:
+                logger.debug(f"Incoming {request.method} {request.url.path} request with empty body.")
+        except Exception as e:
+            logger.warning(f"Could not read or log request body for {request.method} {request.url.path}: {e}")
+
+
+def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
+    @functools.wraps(func)
+    async def route_handler(request: Request, **kwargs):
         # Get auth attributes from the request scope
         user_attributes = request.scope.get("user_attributes", {})
 
+        await log_request_pre_validation(request)
+
         # Use context manager with both provider data and auth attributes
         with request_provider_data_context(request.headers, user_attributes):
             is_streaming = is_streaming_request(func.__name__, request, **kwargs)
@@ -220,26 +254,52 @@ def create_dynamic_typed_route(func: Any, method: str, route: str):
             for param in new_params[1:]
         ]
 
-    endpoint.__signature__ = sig.replace(parameters=new_params)
+    route_handler.__signature__ = sig.replace(parameters=new_params)
 
-    return endpoint
+    return route_handler
 
 
 class TracingMiddleware:
     def __init__(self, app, impls):
         self.app = app
         self.impls = impls
+        # FastAPI built-in paths that should bypass custom routing
+        self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static")
 
     async def __call__(self, scope, receive, send):
         if scope.get("type") == "lifespan":
             return await self.app(scope, receive, send)
 
         path = scope.get("path", "")
-        if not hasattr(self, "endpoint_impls"):
-            self.endpoint_impls = initialize_endpoint_impls(self.impls)
-        _, _, trace_path = find_matching_endpoint(scope.get("method", "GET"), path, self.endpoint_impls)
 
-        trace_context = await start_trace(trace_path, {"__location__": "server", "raw_path": path})
+        # Check if the path is a FastAPI built-in path
+        if path.startswith(self.fastapi_paths):
+            # Pass through to FastAPI's built-in handlers
+            logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}")
+            return await self.app(scope, receive, send)
+
+        if not hasattr(self, "route_impls"):
+            self.route_impls = initialize_route_impls(self.impls)
+
+        try:
+            _, _, trace_path = find_matching_route(scope.get("method", hdrs.METH_GET), path, self.route_impls)
+        except ValueError:
+            # If no matching endpoint is found, pass through to FastAPI
+            logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
+            return await self.app(scope, receive, send)
+
+        trace_attributes = {"__location__": "server", "raw_path": path}
+
+        # Extract W3C trace context headers and store as trace attributes
+        headers = dict(scope.get("headers", []))
+        traceparent = headers.get(b"traceparent", b"").decode()
+        if traceparent:
+            trace_attributes["traceparent"] = traceparent
+        tracestate = headers.get(b"tracestate", b"").decode()
+        if tracestate:
+            trace_attributes["tracestate"] = tracestate
+
+        trace_context = await start_trace(trace_path, trace_attributes)
 
         async def send_with_trace_id(message):
             if message["type"] == "http.response.start":
@@ -294,7 +354,7 @@ class ClientVersionMiddleware:
         return await self.app(scope, receive, send)
 
 
-def main(args: Optional[argparse.Namespace] = None):
+def main(args: argparse.Namespace | None = None):
     """Start the LlamaStack server."""
     parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
     parser.add_argument(
@@ -317,22 +377,11 @@ def main(args: Optional[argparse.Namespace] = None):
         default=int(os.getenv("LLAMA_STACK_PORT", 8321)),
         help="Port to listen on",
     )
-    parser.add_argument("--disable-ipv6", action="store_true", help="Whether to disable IPv6 support")
     parser.add_argument(
         "--env",
         action="append",
         help="Environment variables in KEY=value format. Can be specified multiple times.",
     )
-    parser.add_argument(
-        "--tls-keyfile",
-        help="Path to TLS key file for HTTPS",
-        required="--tls-certfile" in sys.argv,
-    )
-    parser.add_argument(
-        "--tls-certfile",
-        help="Path to TLS certificate file for HTTPS",
-        required="--tls-keyfile" in sys.argv,
-    )
 
     # Determine whether the server args are being passed by the "run" command, if this is the case
     # the args will be passed as a Namespace object to the main function, otherwise they will be
@@ -340,14 +389,6 @@ def main(args: Optional[argparse.Namespace] = None):
     if args is None:
         args = parser.parse_args()
 
-    # Check for deprecated argument usage
-    if "--yaml-config" in sys.argv:
-        warnings.warn(
-            "The '--yaml-config' argument is deprecated and will be removed in a future version. Use '--config' instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
     log_line = ""
     if args.config:
         # if the user provided a config file, use it, even if template was specified
@@ -361,10 +402,10 @@ def main(args: Optional[argparse.Namespace] = None):
             raise ValueError(f"Template {args.template} does not exist")
         log_line = f"Using template {args.template} config file: {config_file}"
     else:
-        raise ValueError("Either --yaml-config or --template must be provided")
+        raise ValueError("Either --config or --template must be provided")
 
     logger_config = None
-    with open(config_file, "r") as fp:
+    with open(config_file) as fp:
         config_contents = yaml.safe_load(fp)
         if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
             logger_config = LoggingConfig(**cfg)
@@ -388,14 +429,59 @@ def main(args: Optional[argparse.Namespace] = None):
     safe_config = redact_sensitive_fields(config.model_dump())
     logger.info(yaml.dump(safe_config, indent=2))
 
-    app = FastAPI(lifespan=lifespan)
+    app = FastAPI(
+        lifespan=lifespan,
+        docs_url="/docs",
+        redoc_url="/redoc",
+        openapi_url="/openapi.json",
+    )
     if not os.environ.get("LLAMA_STACK_DISABLE_VERSION_CHECK"):
         app.add_middleware(ClientVersionMiddleware)
 
     # Add authentication middleware if configured
-    if config.server.auth and config.server.auth.endpoint:
-        logger.info(f"Enabling authentication with endpoint: {config.server.auth.endpoint}")
-        app.add_middleware(AuthenticationMiddleware, auth_endpoint=config.server.auth.endpoint)
+    if config.server.auth:
+        logger.info(f"Enabling authentication with provider: {config.server.auth.provider_type.value}")
+        app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth)
+    else:
+        if config.server.quota:
+            quota = config.server.quota
+            logger.warning(
+                "Configured authenticated_max_requests (%d) but no auth is enabled; "
+                "falling back to anonymous_max_requests (%d) for all the requests",
+                quota.authenticated_max_requests,
+                quota.anonymous_max_requests,
+            )
+
+    if config.server.quota:
+        logger.info("Enabling quota middleware for authenticated and anonymous clients")
+
+        quota = config.server.quota
+        anonymous_max_requests = quota.anonymous_max_requests
+        # if auth is disabled, use the anonymous max requests
+        authenticated_max_requests = quota.authenticated_max_requests if config.server.auth else anonymous_max_requests
+
+        kv_config = quota.kvstore
+        window_map = {"day": 86400}
+        window_seconds = window_map[quota.period.value]
+
+        app.add_middleware(
+            QuotaMiddleware,
+            kv_config=kv_config,
+            anonymous_max_requests=anonymous_max_requests,
+            authenticated_max_requests=authenticated_max_requests,
+            window_seconds=window_seconds,
+        )
+
+    # --- CORS middleware for local development ---
+    # TODO: move to reverse proxy
+    ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=[f"http://localhost:{ui_port}"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
 
     try:
         impls = asyncio.run(construct_stack(config))
@@ -408,7 +494,7 @@ def main(args: Optional[argparse.Namespace] = None):
     else:
         setup_logger(TelemetryAdapter(TelemetryConfig(), {}))
 
-    all_endpoints = get_all_api_endpoints()
+    all_routes = get_all_api_routes()
 
     if config.apis:
         apis_to_serve = set(config.apis)
@@ -426,23 +512,29 @@ def main(args: Optional[argparse.Namespace] = None):
     for api_str in apis_to_serve:
         api = Api(api_str)
 
-        endpoints = all_endpoints[api]
+        routes = all_routes[api]
         impl = impls[api]
 
-        for endpoint in endpoints:
-            if not hasattr(impl, endpoint.name):
+        for route in routes:
+            if not hasattr(impl, route.name):
                 # ideally this should be a typing violation already
-                raise ValueError(f"Could not find method {endpoint.name} on {impl}!!")
+                raise ValueError(f"Could not find method {route.name} on {impl}!")
 
-            impl_method = getattr(impl, endpoint.name)
+            impl_method = getattr(impl, route.name)
+            # Filter out HEAD method since it's automatically handled by FastAPI for GET routes
+            available_methods = [m for m in route.methods if m != "HEAD"]
+            if not available_methods:
+                raise ValueError(f"No methods found for {route.name} on {impl}")
+            method = available_methods[0]
+            logger.debug(f"{method} {route.path}")
 
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=UserWarning, module="pydantic._internal._fields")
-                getattr(app, endpoint.method)(endpoint.route, response_model=None)(
+                getattr(app, method.lower())(route.path, response_model=None)(
                     create_dynamic_typed_route(
                         impl_method,
-                        endpoint.method,
-                        endpoint.route,
+                        method.lower(),
+                        route.path,
                     )
                 )
 
@@ -460,21 +552,24 @@ def main(args: Optional[argparse.Namespace] = None):
     port = args.port or config.server.port
 
     ssl_config = None
-    if args.tls_keyfile:
-        keyfile = args.tls_keyfile
-        certfile = args.tls_certfile
-    else:
-        keyfile = config.server.tls_keyfile
-        certfile = config.server.tls_certfile
+    keyfile = config.server.tls_keyfile
+    certfile = config.server.tls_certfile
 
     if keyfile and certfile:
         ssl_config = {
             "ssl_keyfile": keyfile,
             "ssl_certfile": certfile,
         }
-        logger.info(f"HTTPS enabled with certificates:\n  Key: {keyfile}\n  Cert: {certfile}")
+        if config.server.tls_cafile:
+            ssl_config["ssl_ca_certs"] = config.server.tls_cafile
+            ssl_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
+            logger.info(
+                f"HTTPS enabled with certificates:\n  Key: {keyfile}\n  Cert: {certfile}\n  CA: {config.server.tls_cafile}"
+            )
+        else:
+            logger.info(f"HTTPS enabled with certificates:\n  Key: {keyfile}\n  Cert: {certfile}")
 
-    listen_host = ["::", "0.0.0.0"] if not args.disable_ipv6 else "0.0.0.0"
+    listen_host = config.server.host or ["::", "0.0.0.0"]
     logger.info(f"Listening on {listen_host}:{port}")
 
     uvicorn_config = {
@@ -490,7 +585,7 @@ def main(args: Optional[argparse.Namespace] = None):
     uvicorn.run(**uvicorn_config)
 
 
-def extract_path_params(route: str) -> List[str]:
+def extract_path_params(route: str) -> list[str]:
     segments = route.split("/")
     params = [seg[1:-1] for seg in segments if seg.startswith("{") and seg.endswith("}")]
     # to handle path params like {param:path}
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 08ff5e7cd..fc68dc016 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -8,7 +8,7 @@ import importlib.resources
 import os
 import re
 import tempfile
-from typing import Any, Dict, Optional
+from typing import Any
 
 import yaml
 
@@ -35,6 +35,8 @@ from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.distribution.datatypes import Provider, StackRunConfig
 from llama_stack.distribution.distribution import get_provider_registry
+from llama_stack.distribution.inspect import DistributionInspectConfig, DistributionInspectImpl
+from llama_stack.distribution.providers import ProviderImpl, ProviderImplConfig
 from llama_stack.distribution.resolver import ProviderRegistry, resolve_impls
 from llama_stack.distribution.store.registry import create_dist_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
@@ -88,7 +90,7 @@ RESOURCES = [
 ]
 
 
-async def register_resources(run_config: StackRunConfig, impls: Dict[Api, Any]):
+async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
     for rsrc, api, register_method, list_method in RESOURCES:
         objects = getattr(run_config, rsrc)
         if api not in impls:
@@ -119,26 +121,6 @@ class EnvVarError(Exception):
         super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}")
 
 
-def redact_sensitive_fields(data: Dict[str, Any]) -> Dict[str, Any]:
-    """Redact sensitive information from config before printing."""
-    sensitive_patterns = ["api_key", "api_token", "password", "secret"]
-
-    def _redact_dict(d: Dict[str, Any]) -> Dict[str, Any]:
-        result = {}
-        for k, v in d.items():
-            if isinstance(v, dict):
-                result[k] = _redact_dict(v)
-            elif isinstance(v, list):
-                result[k] = [_redact_dict(i) if isinstance(i, dict) else i for i in v]
-            elif any(pattern in k.lower() for pattern in sensitive_patterns):
-                result[k] = "********"
-            else:
-                result[k] = v
-        return result
-
-    return _redact_dict(data)
-
-
 def replace_env_vars(config: Any, path: str = "") -> Any:
     if isinstance(config, dict):
         result = {}
@@ -215,13 +197,37 @@ def validate_env_pair(env_pair: str) -> tuple[str, str]:
         ) from e
 
 
+def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
+    """Add internal implementations (inspect and providers) to the implementations dictionary.
+
+    Args:
+        impls: Dictionary of API implementations
+        run_config: Stack run configuration
+    """
+    inspect_impl = DistributionInspectImpl(
+        DistributionInspectConfig(run_config=run_config),
+        deps=impls,
+    )
+    impls[Api.inspect] = inspect_impl
+
+    providers_impl = ProviderImpl(
+        ProviderImplConfig(run_config=run_config),
+        deps=impls,
+    )
+    impls[Api.providers] = providers_impl
+
+
 # Produces a stack of providers for the given run config. Not all APIs may be
 # asked for in the run config.
 async def construct_stack(
-    run_config: StackRunConfig, provider_registry: Optional[ProviderRegistry] = None
-) -> Dict[Api, Any]:
+    run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None
+) -> dict[Api, Any]:
     dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
     impls = await resolve_impls(run_config, provider_registry or get_provider_registry(run_config), dist_registry)
+
+    # Add internal implementations after all other providers are resolved
+    add_internal_implementations(impls, run_config)
+
     await register_resources(run_config, impls)
     return impls
 
@@ -238,7 +244,7 @@ def get_stack_run_config_from_template(template: str) -> StackRunConfig:
 
 
 def run_config_from_adhoc_config_spec(
-    adhoc_config_spec: str, provider_registry: Optional[ProviderRegistry] = None
+    adhoc_config_spec: str, provider_registry: ProviderRegistry | None = None
 ) -> StackRunConfig:
     """
     Create an adhoc distribution from a list of API providers.
diff --git a/llama_stack/distribution/start_stack.sh b/llama_stack/distribution/start_stack.sh
index d3e13c7dc..996935a5e 100755
--- a/llama_stack/distribution/start_stack.sh
+++ b/llama_stack/distribution/start_stack.sh
@@ -29,7 +29,7 @@ error_handler() {
 trap 'error_handler ${LINENO}' ERR
 
 if [ $# -lt 3 ]; then
-  echo "Usage: $0 <env_type> <env_path_or_name> <yaml_config> <port> <script_args...>"
+  echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>] [--env KEY=VALUE]..."
   exit 1
 fi
 
@@ -40,37 +40,51 @@ env_path_or_name="$1"
 container_image="localhost/$env_path_or_name"
 shift
 
-yaml_config="$1"
-shift
-
 port="$1"
 shift
 
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 source "$SCRIPT_DIR/common.sh"
 
-# Initialize env_vars as an string
+# Initialize variables
+yaml_config=""
 env_vars=""
 other_args=""
-# Process environment variables from --env arguments
+
+# Process remaining arguments
 while [[ $# -gt 0 ]]; do
   case "$1" in
-  --env)
-
-    if [[ -n "$2" ]]; then
-      env_vars="$env_vars --env $2"
-      shift 2
-    else
-      echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
-      exit 1
-    fi
-    ;;
-  *)
-    other_args="$other_args $1"
-    shift
-    ;;
+    --config)
+      if [[ -n "$2" ]]; then
+        yaml_config="$2"
+        shift 2
+      else
+        echo -e "${RED}Error: $1 requires a CONFIG argument${NC}" >&2
+        exit 1
+      fi
+      ;;
+    --env)
+      if [[ -n "$2" ]]; then
+        env_vars="$env_vars --env $2"
+        shift 2
+      else
+        echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
+        exit 1
+      fi
+      ;;
+    *)
+      other_args="$other_args $1"
+      shift
+      ;;
   esac
 done
+
+# Check if yaml_config is required based on env_type
+if [[ "$env_type" == "venv" || "$env_type" == "conda" ]] && [ -z "$yaml_config" ]; then
+  echo -e "${RED}Error: --config is required for venv and conda environments${NC}" >&2
+  exit 1
+fi
+
 PYTHON_BINARY="python"
 case "$env_type" in
   "venv")
@@ -106,8 +120,14 @@ esac
 if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
     set -x
 
+    if [ -n "$yaml_config" ]; then
+        yaml_config_arg="--config $yaml_config"
+    else
+        yaml_config_arg=""
+    fi
+
     $PYTHON_BINARY -m llama_stack.distribution.server.server \
-    --yaml-config "$yaml_config" \
+    $yaml_config_arg \
     --port "$port" \
     $env_vars \
     $other_args
@@ -149,15 +169,26 @@ elif [[ "$env_type" == "container" ]]; then
         version_tag=$(curl -s $URL | jq -r '.info.version')
     fi
 
-    $CONTAINER_BINARY run $CONTAINER_OPTS -it \
+    # Build the command with optional yaml config
+    cmd="$CONTAINER_BINARY run $CONTAINER_OPTS -it \
     -p $port:$port \
     $env_vars \
-    -v "$yaml_config:/app/config.yaml" \
     $mounts \
     --env LLAMA_STACK_PORT=$port \
     --entrypoint python \
     $container_image:$version_tag \
-    -m llama_stack.distribution.server.server \
-    --yaml-config /app/config.yaml \
-    $other_args
+    -m llama_stack.distribution.server.server"
+
+    # Add yaml config if provided, otherwise use default
+    if [ -n "$yaml_config" ]; then
+        cmd="$cmd -v $yaml_config:/app/run.yaml --config /app/run.yaml"
+    else
+        cmd="$cmd --config /app/run.yaml"
+    fi
+
+    # Add any other args
+    cmd="$cmd $other_args"
+
+    # Execute the command
+    eval $cmd
 fi
diff --git a/llama_stack/distribution/store/registry.py b/llama_stack/distribution/store/registry.py
index 76b66cc7a..0e84854c2 100644
--- a/llama_stack/distribution/store/registry.py
+++ b/llama_stack/distribution/store/registry.py
@@ -6,7 +6,7 @@
 
 import asyncio
 from contextlib import asynccontextmanager
-from typing import Dict, List, Optional, Protocol, Tuple
+from typing import Protocol
 
 import pydantic
 
@@ -20,13 +20,13 @@ logger = get_logger(__name__, category="core")
 
 
 class DistributionRegistry(Protocol):
-    async def get_all(self) -> List[RoutableObjectWithProvider]: ...
+    async def get_all(self) -> list[RoutableObjectWithProvider]: ...
 
     async def initialize(self) -> None: ...
 
-    async def get(self, identifier: str) -> Optional[RoutableObjectWithProvider]: ...
+    async def get(self, identifier: str) -> RoutableObjectWithProvider | None: ...
 
-    def get_cached(self, identifier: str) -> Optional[RoutableObjectWithProvider]: ...
+    def get_cached(self, identifier: str) -> RoutableObjectWithProvider | None: ...
 
     async def update(self, obj: RoutableObjectWithProvider) -> RoutableObjectWithProvider: ...
 
@@ -36,17 +36,17 @@ class DistributionRegistry(Protocol):
 
 
 REGISTER_PREFIX = "distributions:registry"
-KEY_VERSION = "v8"
+KEY_VERSION = "v9"
 KEY_FORMAT = f"{REGISTER_PREFIX}:{KEY_VERSION}::" + "{type}:{identifier}"
 
 
-def _get_registry_key_range() -> Tuple[str, str]:
+def _get_registry_key_range() -> tuple[str, str]:
     """Returns the start and end keys for the registry range query."""
     start_key = f"{REGISTER_PREFIX}:{KEY_VERSION}"
     return start_key, f"{start_key}\xff"
 
 
-def _parse_registry_values(values: List[str]) -> List[RoutableObjectWithProvider]:
+def _parse_registry_values(values: list[str]) -> list[RoutableObjectWithProvider]:
     """Utility function to parse registry values into RoutableObjectWithProvider objects."""
     all_objects = []
     for value in values:
@@ -67,16 +67,16 @@ class DiskDistributionRegistry(DistributionRegistry):
     async def initialize(self) -> None:
         pass
 
-    def get_cached(self, type: str, identifier: str) -> Optional[RoutableObjectWithProvider]:
+    def get_cached(self, type: str, identifier: str) -> RoutableObjectWithProvider | None:
         # Disk registry does not have a cache
         raise NotImplementedError("Disk registry does not have a cache")
 
-    async def get_all(self) -> List[RoutableObjectWithProvider]:
+    async def get_all(self) -> list[RoutableObjectWithProvider]:
         start_key, end_key = _get_registry_key_range()
-        values = await self.kvstore.range(start_key, end_key)
+        values = await self.kvstore.values_in_range(start_key, end_key)
         return _parse_registry_values(values)
 
-    async def get(self, type: str, identifier: str) -> Optional[RoutableObjectWithProvider]:
+    async def get(self, type: str, identifier: str) -> RoutableObjectWithProvider | None:
         json_str = await self.kvstore.get(KEY_FORMAT.format(type=type, identifier=identifier))
         if not json_str:
             return None
@@ -113,7 +113,7 @@ class DiskDistributionRegistry(DistributionRegistry):
 class CachedDiskDistributionRegistry(DiskDistributionRegistry):
     def __init__(self, kvstore: KVStore):
         super().__init__(kvstore)
-        self.cache: Dict[Tuple[str, str], RoutableObjectWithProvider] = {}
+        self.cache: dict[tuple[str, str], RoutableObjectWithProvider] = {}
         self._initialized = False
         self._initialize_lock = asyncio.Lock()
         self._cache_lock = asyncio.Lock()
@@ -134,7 +134,7 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
                 return
 
             start_key, end_key = _get_registry_key_range()
-            values = await self.kvstore.range(start_key, end_key)
+            values = await self.kvstore.values_in_range(start_key, end_key)
             objects = _parse_registry_values(values)
 
             async with self._locked_cache() as cache:
@@ -147,15 +147,15 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
     async def initialize(self) -> None:
         await self._ensure_initialized()
 
-    def get_cached(self, type: str, identifier: str) -> Optional[RoutableObjectWithProvider]:
+    def get_cached(self, type: str, identifier: str) -> RoutableObjectWithProvider | None:
         return self.cache.get((type, identifier), None)
 
-    async def get_all(self) -> List[RoutableObjectWithProvider]:
+    async def get_all(self) -> list[RoutableObjectWithProvider]:
         await self._ensure_initialized()
         async with self._locked_cache() as cache:
             return list(cache.values())
 
-    async def get(self, type: str, identifier: str) -> Optional[RoutableObjectWithProvider]:
+    async def get(self, type: str, identifier: str) -> RoutableObjectWithProvider | None:
         await self._ensure_initialized()
         cache_key = (type, identifier)
 
@@ -189,7 +189,7 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
 
 
 async def create_dist_registry(
-    metadata_store: Optional[KVStoreConfig],
+    metadata_store: KVStoreConfig | None,
     image_name: str,
 ) -> tuple[CachedDiskDistributionRegistry, KVStore]:
     # instantiate kvstore for storing and retrieving distribution metadata
diff --git a/llama_stack/distribution/ui/Containerfile b/llama_stack/distribution/ui/Containerfile
index 0126d1867..5d2dc933b 100644
--- a/llama_stack/distribution/ui/Containerfile
+++ b/llama_stack/distribution/ui/Containerfile
@@ -5,7 +5,8 @@ FROM python:3.12-slim
 WORKDIR /app
 COPY . /app/
 RUN /usr/local/bin/python -m pip install --upgrade pip && \
-    /usr/local/bin/pip3 install -r requirements.txt
+    /usr/local/bin/pip3 install -r requirements.txt && \
+    /usr/local/bin/pip3 install -r llama_stack/distribution/ui/requirements.txt
 EXPOSE 8501
 
-ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
+ENTRYPOINT ["streamlit", "run", "llama_stack/distribution/ui/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
diff --git a/llama_stack/distribution/ui/README.md b/llama_stack/distribution/ui/README.md
index 51c2d2bc2..0e96690ec 100644
--- a/llama_stack/distribution/ui/README.md
+++ b/llama_stack/distribution/ui/README.md
@@ -48,3 +48,6 @@ uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
 | TOGETHER_API_KEY           | API key for Together provider      | (empty string)            |
 | SAMBANOVA_API_KEY          | API key for SambaNova provider     | (empty string)            |
 | OPENAI_API_KEY             | API key for OpenAI provider        | (empty string)            |
+| KEYCLOAK_URL               | URL for keycloak authentication    | (empty string)            |
+| KEYCLOAK_REALM             | Keycloak realm                     | default                   |
+| KEYCLOAK_CLIENT_ID         | Client ID for keycloak auth        | (empty string)            |
\ No newline at end of file
diff --git a/llama_stack/distribution/ui/app.py b/llama_stack/distribution/ui/app.py
index 441f65d20..a9a28b445 100644
--- a/llama_stack/distribution/ui/app.py
+++ b/llama_stack/distribution/ui/app.py
@@ -50,6 +50,42 @@ def main():
     )
     pg.run()
 
+def main2():
+    from dataclasses import asdict
+    st.subheader(f"Welcome {keycloak.user_info['preferred_username']}!")
+    st.write(f"Here is your user information:")
+    st.write(asdict(keycloak))
+
+def get_access_token() -> str|None:
+    return st.session_state.get('access_token')
 
 if __name__ == "__main__":
-    main()
+    
+    from streamlit_keycloak import login
+    import os
+    
+    keycloak_url = os.environ.get("KEYCLOAK_URL")
+    keycloak_realm = os.environ.get("KEYCLOAK_REALM", "default")
+    keycloak_client_id = os.environ.get("KEYCLOAK_CLIENT_ID")
+    
+    if keycloak_url and keycloak_client_id:
+        keycloak = login(
+            url=keycloak_url,
+            realm=keycloak_realm,
+            client_id=keycloak_client_id,
+            custom_labels={
+                "labelButton": "Sign in to kvant",
+                "labelLogin": "Please sign in to your kvant account.",
+                "errorNoPopup": "Unable to open the authentication popup. Allow popups and refresh the page to proceed.",
+                "errorPopupClosed": "Authentication popup was closed manually.",
+                "errorFatal": "Unable to connect to Keycloak using the current configuration."   
+            },
+            auto_refresh=True,
+        )
+
+        if keycloak.authenticated:
+            st.session_state['access_token'] = keycloak.access_token
+            main()
+    # TBD - add other authentications
+    else:
+        main()
diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py
index d5395c5b9..a426e59ba 100644
--- a/llama_stack/distribution/ui/modules/api.py
+++ b/llama_stack/distribution/ui/modules/api.py
@@ -5,14 +5,15 @@
 # the root directory of this source tree.
 
 import os
-from typing import Optional
 
 from llama_stack_client import LlamaStackClient
+from llama_stack.distribution.ui.app import get_access_token
 
 
 class LlamaStackApi:
     def __init__(self):
         self.client = LlamaStackClient(
+            api_key=get_access_token(),
             base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"),
             provider_data={
                 "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
@@ -23,11 +24,9 @@ class LlamaStackApi:
             },
         )
 
-    def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: Optional[dict]):
+    def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
         """Run scoring on a single row"""
         if not scoring_params:
             scoring_params = {fn_id: None for fn_id in scoring_function_ids}
         return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
 
-
-llama_stack_api = LlamaStackApi()
diff --git a/llama_stack/distribution/ui/page/distribution/datasets.py b/llama_stack/distribution/ui/page/distribution/datasets.py
index 6842b29a7..89f645ca8 100644
--- a/llama_stack/distribution/ui/page/distribution/datasets.py
+++ b/llama_stack/distribution/ui/page/distribution/datasets.py
@@ -6,13 +6,13 @@
 
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 
 
 def datasets():
     st.header("Datasets")
 
-    datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()}
+    datasets_info = {d.identifier: d.to_dict() for d in LlamaStackApi().client.datasets.list()}
     if len(datasets_info) > 0:
         selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
         st.json(datasets_info[selected_dataset], expanded=True)
diff --git a/llama_stack/distribution/ui/page/distribution/eval_tasks.py b/llama_stack/distribution/ui/page/distribution/eval_tasks.py
index 492be4700..2b70f9202 100644
--- a/llama_stack/distribution/ui/page/distribution/eval_tasks.py
+++ b/llama_stack/distribution/ui/page/distribution/eval_tasks.py
@@ -6,14 +6,14 @@
 
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 
 
 def benchmarks():
     # Benchmarks Section
     st.header("Benchmarks")
 
-    benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()}
+    benchmarks_info = {d.identifier: d.to_dict() for d in LlamaStackApi().client.benchmarks.list()}
 
     if len(benchmarks_info) > 0:
         selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect")
diff --git a/llama_stack/distribution/ui/page/distribution/models.py b/llama_stack/distribution/ui/page/distribution/models.py
index f29459098..3b96f179f 100644
--- a/llama_stack/distribution/ui/page/distribution/models.py
+++ b/llama_stack/distribution/ui/page/distribution/models.py
@@ -6,13 +6,13 @@
 
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 
 
 def models():
     # Models Section
     st.header("Models")
-    models_info = {m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()}
+    models_info = {m.identifier: m.to_dict() for m in LlamaStackApi().client.models.list()}
 
     selected_model = st.selectbox("Select a model", list(models_info.keys()))
     st.json(models_info[selected_model])
diff --git a/llama_stack/distribution/ui/page/distribution/providers.py b/llama_stack/distribution/ui/page/distribution/providers.py
index c660cb986..116237b13 100644
--- a/llama_stack/distribution/ui/page/distribution/providers.py
+++ b/llama_stack/distribution/ui/page/distribution/providers.py
@@ -6,12 +6,12 @@
 
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 
 
 def providers():
     st.header("🔍 API Providers")
-    apis_providers_lst = llama_stack_api.client.providers.list()
+    apis_providers_lst = LlamaStackApi().client.providers.list()
     api_to_providers = {}
     for api_provider in apis_providers_lst:
         if api_provider.api in api_to_providers:
diff --git a/llama_stack/distribution/ui/page/distribution/scoring_functions.py b/llama_stack/distribution/ui/page/distribution/scoring_functions.py
index 193146356..3c3428f44 100644
--- a/llama_stack/distribution/ui/page/distribution/scoring_functions.py
+++ b/llama_stack/distribution/ui/page/distribution/scoring_functions.py
@@ -6,13 +6,13 @@
 
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 
 
 def scoring_functions():
     st.header("Scoring Functions")
 
-    scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()}
+    scoring_functions_info = {s.identifier: s.to_dict() for s in LlamaStackApi().client.scoring_functions.list()}
 
     selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys()))
     st.json(scoring_functions_info[selected_scoring_function], expanded=True)
diff --git a/llama_stack/distribution/ui/page/distribution/shields.py b/llama_stack/distribution/ui/page/distribution/shields.py
index 67d66d64f..84b583980 100644
--- a/llama_stack/distribution/ui/page/distribution/shields.py
+++ b/llama_stack/distribution/ui/page/distribution/shields.py
@@ -6,14 +6,14 @@
 
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 
 
 def shields():
     # Shields Section
     st.header("Shields")
 
-    shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()}
+    shields_info = {s.identifier: s.to_dict() for s in LlamaStackApi().client.shields.list()}
 
     selected_shield = st.selectbox("Select a shield", list(shields_info.keys()))
     st.json(shields_info[selected_shield])
diff --git a/llama_stack/distribution/ui/page/distribution/vector_dbs.py b/llama_stack/distribution/ui/page/distribution/vector_dbs.py
index 49a4f25bb..e7eb7b13b 100644
--- a/llama_stack/distribution/ui/page/distribution/vector_dbs.py
+++ b/llama_stack/distribution/ui/page/distribution/vector_dbs.py
@@ -6,12 +6,12 @@
 
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 
 
 def vector_dbs():
     st.header("Vector Databases")
-    vector_dbs_info = {v.identifier: v.to_dict() for v in llama_stack_api.client.vector_dbs.list()}
+    vector_dbs_info = {v.identifier: v.to_dict() for v in LlamaStackApi().client.vector_dbs.list()}
 
     if len(vector_dbs_info) > 0:
         selected_vector_db = st.selectbox("Select a vector database", list(vector_dbs_info.keys()))
diff --git a/llama_stack/distribution/ui/page/evaluations/app_eval.py b/llama_stack/distribution/ui/page/evaluations/app_eval.py
index d7bc6388c..13da6071e 100644
--- a/llama_stack/distribution/ui/page/evaluations/app_eval.py
+++ b/llama_stack/distribution/ui/page/evaluations/app_eval.py
@@ -9,7 +9,7 @@ import json
 import pandas as pd
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 from llama_stack.distribution.ui.modules.utils import process_dataset
 
 
@@ -39,7 +39,7 @@ def application_evaluation_page():
 
     # Select Scoring Functions to Run Evaluation On
     st.subheader("Select Scoring Functions")
-    scoring_functions = llama_stack_api.client.scoring_functions.list()
+    scoring_functions = LlamaStackApi().client.scoring_functions.list()
     scoring_functions = {sf.identifier: sf for sf in scoring_functions}
     scoring_functions_names = list(scoring_functions.keys())
     selected_scoring_functions = st.multiselect(
@@ -48,7 +48,7 @@ def application_evaluation_page():
         help="Choose one or more scoring functions.",
     )
 
-    available_models = llama_stack_api.client.models.list()
+    available_models = LlamaStackApi().client.models.list()
     available_models = [m.identifier for m in available_models]
 
     scoring_params = {}
@@ -108,7 +108,7 @@ def application_evaluation_page():
                 progress_bar.progress(progress, text=progress_text)
 
                 # Run evaluation for current row
-                score_res = llama_stack_api.run_scoring(
+                score_res = LlamaStackApi().run_scoring(
                     r,
                     scoring_function_ids=selected_scoring_functions,
                     scoring_params=scoring_params,
diff --git a/llama_stack/distribution/ui/page/evaluations/native_eval.py b/llama_stack/distribution/ui/page/evaluations/native_eval.py
index 97f875e17..133c3b151 100644
--- a/llama_stack/distribution/ui/page/evaluations/native_eval.py
+++ b/llama_stack/distribution/ui/page/evaluations/native_eval.py
@@ -9,13 +9,13 @@ import json
 import pandas as pd
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 
 
 def select_benchmark_1():
     # Select Benchmarks
     st.subheader("1. Choose An Eval Task")
-    benchmarks = llama_stack_api.client.benchmarks.list()
+    benchmarks = LlamaStackApi().client.benchmarks.list()
     benchmarks = {et.identifier: et for et in benchmarks}
     benchmarks_names = list(benchmarks.keys())
     selected_benchmark = st.selectbox(
@@ -47,7 +47,7 @@ def define_eval_candidate_2():
         # Define Eval Candidate
         candidate_type = st.radio("Candidate Type", ["model", "agent"])
 
-        available_models = llama_stack_api.client.models.list()
+        available_models = LlamaStackApi().client.models.list()
         available_models = [model.identifier for model in available_models]
         selected_model = st.selectbox(
             "Choose a model",
@@ -167,7 +167,7 @@ def run_evaluation_3():
     eval_candidate = st.session_state["eval_candidate"]
 
     dataset_id = benchmarks[selected_benchmark].dataset_id
-    rows = llama_stack_api.client.datasets.iterrows(
+    rows = LlamaStackApi().client.datasets.iterrows(
         dataset_id=dataset_id,
     )
     total_rows = len(rows.data)
@@ -208,7 +208,7 @@ def run_evaluation_3():
             progress = i / len(rows)
             progress_bar.progress(progress, text=progress_text)
             # Run evaluation for current row
-            eval_res = llama_stack_api.client.eval.evaluate_rows(
+            eval_res = LlamaStackApi().client.eval.evaluate_rows(
                 benchmark_id=selected_benchmark,
                 input_rows=[r],
                 scoring_functions=benchmarks[selected_benchmark].scoring_functions,
diff --git a/llama_stack/distribution/ui/page/playground/chat.py b/llama_stack/distribution/ui/page/playground/chat.py
index 8e7345169..053ae42de 100644
--- a/llama_stack/distribution/ui/page/playground/chat.py
+++ b/llama_stack/distribution/ui/page/playground/chat.py
@@ -6,12 +6,12 @@
 
 import streamlit as st
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 
 # Sidebar configurations
 with st.sidebar:
     st.header("Configuration")
-    available_models = llama_stack_api.client.models.list()
+    available_models = LlamaStackApi().client.models.list()
     available_models = [model.identifier for model in available_models if model.model_type == "llm"]
     selected_model = st.selectbox(
         "Choose a model",
@@ -103,7 +103,7 @@ if prompt := st.chat_input("Example: What is Llama Stack?"):
         else:
             strategy = {"type": "greedy"}
 
-        response = llama_stack_api.client.inference.chat_completion(
+        response = LlamaStackApi().client.inference.chat_completion(
             messages=[
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": prompt},
@@ -124,7 +124,7 @@ if prompt := st.chat_input("Example: What is Llama Stack?"):
                 message_placeholder.markdown(full_response + "▌")
             message_placeholder.markdown(full_response)
         else:
-            full_response = response
-            message_placeholder.markdown(full_response.completion_message.content)
+            full_response = response.completion_message.content
+            message_placeholder.markdown(full_response)
 
         st.session_state.messages.append({"role": "assistant", "content": full_response})
diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/distribution/ui/page/playground/rag.py
index 392c9afe2..94e27a255 100644
--- a/llama_stack/distribution/ui/page/playground/rag.py
+++ b/llama_stack/distribution/ui/page/playground/rag.py
@@ -10,7 +10,7 @@ import streamlit as st
 from llama_stack_client import Agent, AgentEventLogger, RAGDocument
 
 from llama_stack.apis.common.content_types import ToolCallDelta
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
 from llama_stack.distribution.ui.modules.utils import data_url_from_file
 
 
@@ -24,6 +24,13 @@ def rag_chat_page():
     def should_disable_input():
         return "displayed_messages" in st.session_state and len(st.session_state.displayed_messages) > 0
 
+    def log_message(message):
+        with st.chat_message(message["role"]):
+            if "tool_output" in message and message["tool_output"]:
+                with st.expander(label="Tool Output", expanded=False, icon="🛠"):
+                    st.write(message["tool_output"])
+            st.markdown(message["content"])
+
     with st.sidebar:
         # File/Directory Upload Section
         st.subheader("Upload Documents", divider=True)
@@ -50,14 +57,14 @@ def rag_chat_page():
                     for i, uploaded_file in enumerate(uploaded_files)
                 ]
 
-                providers = llama_stack_api.client.providers.list()
+                providers = LlamaStackApi().client.providers.list()
                 vector_io_provider = None
 
                 for x in providers:
                     if x.api == "vector_io":
                         vector_io_provider = x.provider_id
 
-                llama_stack_api.client.vector_dbs.register(
+                LlamaStackApi().client.vector_dbs.register(
                     vector_db_id=vector_db_name,  # Use the user-provided name
                     embedding_dimension=384,
                     embedding_model="all-MiniLM-L6-v2",
@@ -65,7 +72,7 @@ def rag_chat_page():
                 )
 
                 # insert documents using the custom vector db name
-                llama_stack_api.client.tool_runtime.rag_tool.insert(
+                LlamaStackApi().client.tool_runtime.rag_tool.insert(
                     vector_db_id=vector_db_name,  # Use the user-provided name
                     documents=documents,
                     chunk_size_in_tokens=512,
@@ -86,7 +93,7 @@ def rag_chat_page():
         )
 
         # select memory banks
-        vector_dbs = llama_stack_api.client.vector_dbs.list()
+        vector_dbs = LlamaStackApi().client.vector_dbs.list()
         vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
         selected_vector_dbs = st.multiselect(
             label="Select Document Collections to use in RAG queries",
@@ -96,7 +103,7 @@ def rag_chat_page():
         )
 
         st.subheader("Inference Parameters", divider=True)
-        available_models = llama_stack_api.client.models.list()
+        available_models = LlamaStackApi().client.models.list()
         available_models = [model.identifier for model in available_models if model.model_type == "llm"]
         selected_model = st.selectbox(
             label="Choose a model",
@@ -146,8 +153,7 @@ def rag_chat_page():
 
     # Display chat history
     for message in st.session_state.displayed_messages:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
+        log_message(message)
 
     if temperature > 0.0:
         strategy = {
@@ -161,7 +167,7 @@ def rag_chat_page():
     @st.cache_resource
     def create_agent():
         return Agent(
-            llama_stack_api.client,
+            LlamaStackApi().client,
             model=selected_model,
             instructions=system_prompt,
             sampling_params={
@@ -201,7 +207,7 @@ def rag_chat_page():
 
         # Display assistant response
         with st.chat_message("assistant"):
-            retrieval_message_placeholder = st.empty()
+            retrieval_message_placeholder = st.expander(label="Tool Output", expanded=False, icon="🛠")
             message_placeholder = st.empty()
             full_response = ""
             retrieval_response = ""
@@ -209,14 +215,16 @@ def rag_chat_page():
                 log.print()
                 if log.role == "tool_execution":
                     retrieval_response += log.content.replace("====", "").strip()
-                    retrieval_message_placeholder.info(retrieval_response)
+                    retrieval_message_placeholder.write(retrieval_response)
                 else:
                     full_response += log.content
                     message_placeholder.markdown(full_response + "▌")
             message_placeholder.markdown(full_response)
 
             st.session_state.messages.append({"role": "assistant", "content": full_response})
-            st.session_state.displayed_messages.append({"role": "assistant", "content": full_response})
+            st.session_state.displayed_messages.append(
+                {"role": "assistant", "content": full_response, "tool_output": retrieval_response}
+            )
 
     def direct_process_prompt(prompt):
         # Add the system prompt in the beginning of the conversation
@@ -224,27 +232,26 @@ def rag_chat_page():
             st.session_state.messages.append({"role": "system", "content": system_prompt})
 
         # Query the vector DB
-        rag_response = llama_stack_api.client.tool_runtime.rag_tool.query(
+        rag_response = LlamaStackApi().client.tool_runtime.rag_tool.query(
             content=prompt, vector_db_ids=list(selected_vector_dbs)
         )
         prompt_context = rag_response.content
 
         with st.chat_message("assistant"):
+            with st.expander(label="Retrieval Output", expanded=False):
+                st.write(prompt_context)
+
             retrieval_message_placeholder = st.empty()
             message_placeholder = st.empty()
             full_response = ""
             retrieval_response = ""
 
-            # Display the retrieved content
-            retrieval_response += str(prompt_context)
-            retrieval_message_placeholder.info(retrieval_response)
-
             # Construct the extended prompt
             extended_prompt = f"Please answer the following query using the context below.\n\nCONTEXT:\n{prompt_context}\n\nQUERY:\n{prompt}"
 
             # Run inference directly
             st.session_state.messages.append({"role": "user", "content": extended_prompt})
-            response = llama_stack_api.client.inference.chat_completion(
+            response = LlamaStackApi().client.inference.chat_completion(
                 messages=st.session_state.messages,
                 model_id=selected_model,
                 sampling_params={
diff --git a/llama_stack/distribution/ui/page/playground/tools.py b/llama_stack/distribution/ui/page/playground/tools.py
index e987f617b..570bfb366 100644
--- a/llama_stack/distribution/ui/page/playground/tools.py
+++ b/llama_stack/distribution/ui/page/playground/tools.py
@@ -4,18 +4,27 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import enum
+import json
 import uuid
 
 import streamlit as st
 from llama_stack_client import Agent
+from llama_stack_client.lib.agents.react.agent import ReActAgent
+from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
 
-from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.api import LlamaStackApi
+
+
+class AgentType(enum.Enum):
+    REGULAR = "Regular"
+    REACT = "ReAct"
 
 
 def tool_chat_page():
     st.title("🛠 Tools")
 
-    client = llama_stack_api.client
+    client = LlamaStackApi().client
     models = client.models.list()
     model_list = [model.identifier for model in models if model.api_model_type == "llm"]
 
@@ -23,50 +32,121 @@ def tool_chat_page():
     tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
     mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
     builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
+    selected_vector_dbs = []
 
     def reset_agent():
         st.session_state.clear()
         st.cache_resource.clear()
 
     with st.sidebar:
+        st.title("Configuration")
         st.subheader("Model")
-        model = st.selectbox(label="models", options=model_list, on_change=reset_agent)
+        model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed")
+
+        st.subheader("Available ToolGroups")
 
-        st.subheader("Builtin Tools")
         toolgroup_selection = st.pills(
-            label="Available ToolGroups", options=builtin_tools_list, selection_mode="multi", on_change=reset_agent
+            label="Built-in tools",
+            options=builtin_tools_list,
+            selection_mode="multi",
+            on_change=reset_agent,
+            format_func=lambda tool: "".join(tool.split("::")[1:]),
+            help="List of built-in tools from your llama stack server.",
         )
 
-        st.subheader("MCP Servers")
+        if "builtin::rag" in toolgroup_selection:
+            vector_dbs = LlamaStackApi().client.vector_dbs.list() or []
+            if not vector_dbs:
+                st.info("No vector databases available for selection.")
+            vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
+            selected_vector_dbs = st.multiselect(
+                label="Select Document Collections to use in RAG queries",
+                options=vector_dbs,
+                on_change=reset_agent,
+            )
+
         mcp_selection = st.pills(
-            label="Available MCP Servers", options=mcp_tools_list, selection_mode="multi", on_change=reset_agent
+            label="MCP Servers",
+            options=mcp_tools_list,
+            selection_mode="multi",
+            on_change=reset_agent,
+            format_func=lambda tool: "".join(tool.split("::")[1:]),
+            help="List of MCP servers registered to your llama stack server.",
         )
 
         toolgroup_selection.extend(mcp_selection)
 
-        active_tool_list = []
-        for toolgroup_id in toolgroup_selection:
-            active_tool_list.extend(
-                [
-                    f"{''.join(toolgroup_id.split('::')[1:])}:{t.identifier}"
-                    for t in client.tools.list(toolgroup_id=toolgroup_id)
-                ]
-            )
+        grouped_tools = {}
+        total_tools = 0
 
-        st.subheader(f"Active Tools: 🛠 {len(active_tool_list)}")
-        st.json(active_tool_list)
+        for toolgroup_id in toolgroup_selection:
+            tools = client.tools.list(toolgroup_id=toolgroup_id)
+            grouped_tools[toolgroup_id] = [tool.identifier for tool in tools]
+            total_tools += len(tools)
+
+        st.markdown(f"Active Tools: 🛠 {total_tools}")
+
+        for group_id, tools in grouped_tools.items():
+            with st.expander(f"🔧 Tools from `{group_id}`"):
+                for idx, tool in enumerate(tools, start=1):
+                    st.markdown(f"{idx}. `{tool.split(':')[-1]}`")
+
+        st.subheader("Agent Configurations")
+        st.subheader("Agent Type")
+        agent_type = st.radio(
+            label="Select Agent Type",
+            options=["Regular", "ReAct"],
+            on_change=reset_agent,
+        )
+
+        if agent_type == "ReAct":
+            agent_type = AgentType.REACT
+        else:
+            agent_type = AgentType.REGULAR
+
+        max_tokens = st.slider(
+            "Max Tokens",
+            min_value=0,
+            max_value=4096,
+            value=512,
+            step=64,
+            help="The maximum number of tokens to generate",
+            on_change=reset_agent,
+        )
+
+    for i, tool_name in enumerate(toolgroup_selection):
+        if tool_name == "builtin::rag":
+            tool_dict = dict(
+                name="builtin::rag",
+                args={
+                    "vector_db_ids": list(selected_vector_dbs),
+                },
+            )
+            toolgroup_selection[i] = tool_dict
 
     @st.cache_resource
     def create_agent():
-        return Agent(
-            client,
-            model=model,
-            instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
-            tools=toolgroup_selection,
-            sampling_params={
-                "strategy": {"type": "greedy"},
-            },
-        )
+        if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT:
+            return ReActAgent(
+                client=client,
+                model=model,
+                tools=toolgroup_selection,
+                response_format={
+                    "type": "json_schema",
+                    "json_schema": ReActOutput.model_json_schema(),
+                },
+                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
+            )
+        else:
+            return Agent(
+                client,
+                model=model,
+                instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
+                tools=toolgroup_selection,
+                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
+            )
+
+    st.session_state.agent_type = agent_type
 
     agent = create_agent()
 
@@ -95,6 +175,158 @@ def tool_chat_page():
         )
 
         def response_generator(turn_response):
+            if st.session_state.get("agent_type") == AgentType.REACT:
+                return _handle_react_response(turn_response)
+            else:
+                return _handle_regular_response(turn_response)
+
+        def _handle_react_response(turn_response):
+            current_step_content = ""
+            final_answer = None
+            tool_results = []
+
+            for response in turn_response:
+                if not hasattr(response.event, "payload"):
+                    yield (
+                        "\n\n🚨 :red[_Llama Stack server Error:_]\n"
+                        "The response received is missing an expected `payload` attribute.\n"
+                        "This could indicate a malformed response or an internal issue within the server.\n\n"
+                        f"Error details: {response}"
+                    )
+                    return
+
+                payload = response.event.payload
+
+                if payload.event_type == "step_progress" and hasattr(payload.delta, "text"):
+                    current_step_content += payload.delta.text
+                    continue
+
+                if payload.event_type == "step_complete":
+                    step_details = payload.step_details
+
+                    if step_details.step_type == "inference":
+                        yield from _process_inference_step(current_step_content, tool_results, final_answer)
+                        current_step_content = ""
+                    elif step_details.step_type == "tool_execution":
+                        tool_results = _process_tool_execution(step_details, tool_results)
+                        current_step_content = ""
+                    else:
+                        current_step_content = ""
+
+            if not final_answer and tool_results:
+                yield from _format_tool_results_summary(tool_results)
+
+        def _process_inference_step(current_step_content, tool_results, final_answer):
+            try:
+                react_output_data = json.loads(current_step_content)
+                thought = react_output_data.get("thought")
+                action = react_output_data.get("action")
+                answer = react_output_data.get("answer")
+
+                if answer and answer != "null" and answer is not None:
+                    final_answer = answer
+
+                if thought:
+                    with st.expander("🤔 Thinking...", expanded=False):
+                        st.markdown(f":grey[__{thought}__]")
+
+                if action and isinstance(action, dict):
+                    tool_name = action.get("tool_name")
+                    tool_params = action.get("tool_params")
+                    with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False):
+                        st.json(tool_params)
+
+                if answer and answer != "null" and answer is not None:
+                    yield f"\n\n✅ **Final Answer:**\n{answer}"
+
+            except json.JSONDecodeError:
+                yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```"
+            except Exception as e:
+                yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```"
+
+            return final_answer
+
+        def _process_tool_execution(step_details, tool_results):
+            try:
+                if hasattr(step_details, "tool_responses") and step_details.tool_responses:
+                    for tool_response in step_details.tool_responses:
+                        tool_name = tool_response.tool_name
+                        content = tool_response.content
+                        tool_results.append((tool_name, content))
+                        with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False):
+                            try:
+                                parsed_content = json.loads(content)
+                                st.json(parsed_content)
+                            except json.JSONDecodeError:
+                                st.code(content, language=None)
+                else:
+                    with st.expander("⚙️ Observation", expanded=False):
+                        st.markdown(":grey[_Tool execution step completed, but no response data found._]")
+            except Exception as e:
+                with st.expander("⚙️ Error in Tool Execution", expanded=False):
+                    st.markdown(f":red[_Error processing tool execution: {str(e)}_]")
+
+            return tool_results
+
+        def _format_tool_results_summary(tool_results):
+            yield "\n\n**Here's what I found:**\n"
+            for tool_name, content in tool_results:
+                try:
+                    parsed_content = json.loads(content)
+
+                    if tool_name == "web_search" and "top_k" in parsed_content:
+                        yield from _format_web_search_results(parsed_content)
+                    elif "results" in parsed_content and isinstance(parsed_content["results"], list):
+                        yield from _format_results_list(parsed_content["results"])
+                    elif isinstance(parsed_content, dict) and len(parsed_content) > 0:
+                        yield from _format_dict_results(parsed_content)
+                    elif isinstance(parsed_content, list) and len(parsed_content) > 0:
+                        yield from _format_list_results(parsed_content)
+                except json.JSONDecodeError:
+                    yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n"
+                except (TypeError, AttributeError, KeyError, IndexError) as e:
+                    print(f"Error processing {tool_name} result: {type(e).__name__}: {e}")
+
+        def _format_web_search_results(parsed_content):
+            for i, result in enumerate(parsed_content["top_k"], 1):
+                if i <= 3:
+                    title = result.get("title", "Untitled")
+                    url = result.get("url", "")
+                    content_text = result.get("content", "").strip()
+                    yield f"\n- **{title}**\n  {content_text}\n  [Source]({url})\n"
+
+        def _format_results_list(results):
+            for i, result in enumerate(results, 1):
+                if i <= 3:
+                    if isinstance(result, dict):
+                        name = result.get("name", result.get("title", "Result " + str(i)))
+                        description = result.get("description", result.get("content", result.get("summary", "")))
+                        yield f"\n- **{name}**\n  {description}\n"
+                    else:
+                        yield f"\n- {result}\n"
+
+        def _format_dict_results(parsed_content):
+            yield "\n```\n"
+            for key, value in list(parsed_content.items())[:5]:
+                if isinstance(value, str) and len(value) < 100:
+                    yield f"{key}: {value}\n"
+                else:
+                    yield f"{key}: [Complex data]\n"
+            yield "```\n"
+
+        def _format_list_results(parsed_content):
+            yield "\n"
+            for _, item in enumerate(parsed_content[:3], 1):
+                if isinstance(item, str):
+                    yield f"- {item}\n"
+                elif isinstance(item, dict) and "text" in item:
+                    yield f"- {item['text']}\n"
+                elif isinstance(item, dict) and len(item) > 0:
+                    first_value = next(iter(item.values()))
+                    if isinstance(first_value, str) and len(first_value) < 100:
+                        yield f"- {first_value}\n"
+
+        def _handle_regular_response(turn_response):
             for response in turn_response:
                 if hasattr(response.event, "payload"):
                     print(response.event.payload)
@@ -103,14 +335,18 @@ def tool_chat_page():
                             yield response.event.payload.delta.text
                     if response.event.payload.event_type == "step_complete":
                         if response.event.payload.step_details.step_type == "tool_execution":
-                            yield " 🛠 "
+                            if response.event.payload.step_details.tool_calls:
+                                tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name)
+                                yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n'
+                            else:
+                                yield "No tool_calls present in step_details"
                 else:
                     yield f"Error occurred in the Llama Stack Cluster: {response}"
 
         with st.chat_message("assistant"):
-            response = st.write_stream(response_generator(turn_response))
+            response_content = st.write_stream(response_generator(turn_response))
 
-        st.session_state.messages.append({"role": "assistant", "content": response})
+        st.session_state.messages.append({"role": "assistant", "content": response_content})
 
 
 tool_chat_page()
diff --git a/llama_stack/distribution/ui/requirements.txt b/llama_stack/distribution/ui/requirements.txt
index 61d42768d..862f969d6 100644
--- a/llama_stack/distribution/ui/requirements.txt
+++ b/llama_stack/distribution/ui/requirements.txt
@@ -1,5 +1,5 @@
-streamlit
+llama-stack-client>=0.2.9
 pandas
-llama-stack-client>=0.2.1
+streamlit
 streamlit-option-menu
-llama-stack>=0.2.1
+streamlit-keycloak
diff --git a/llama_stack/distribution/utils/config.py b/llama_stack/distribution/utils/config.py
new file mode 100644
index 000000000..dece52460
--- /dev/null
+++ b/llama_stack/distribution/utils/config.py
@@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+
+def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
+    """Redact sensitive information from config before printing."""
+    sensitive_patterns = ["api_key", "api_token", "password", "secret"]
+
+    def _redact_value(v: Any) -> Any:
+        if isinstance(v, dict):
+            return _redact_dict(v)
+        elif isinstance(v, list):
+            return [_redact_value(i) for i in v]
+        return v
+
+    def _redact_dict(d: dict[str, Any]) -> dict[str, Any]:
+        result = {}
+        for k, v in d.items():
+            if any(pattern in k.lower() for pattern in sensitive_patterns):
+                result[k] = "********"
+            else:
+                result[k] = _redact_value(v)
+        return result
+
+    return _redact_dict(data)
diff --git a/llama_stack/distribution/utils/config_dirs.py b/llama_stack/distribution/utils/config_dirs.py
index 9b9a7ceb3..c3e520f28 100644
--- a/llama_stack/distribution/utils/config_dirs.py
+++ b/llama_stack/distribution/utils/config_dirs.py
@@ -14,3 +14,5 @@ DISTRIBS_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "distributions"
 DEFAULT_CHECKPOINT_DIR = LLAMA_STACK_CONFIG_DIR / "checkpoints"
 
 RUNTIME_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "runtime"
+
+EXTERNAL_PROVIDERS_DIR = LLAMA_STACK_CONFIG_DIR / "providers.d"
diff --git a/llama_stack/distribution/utils/context.py b/llama_stack/distribution/utils/context.py
index c34079ac6..3fcd3315f 100644
--- a/llama_stack/distribution/utils/context.py
+++ b/llama_stack/distribution/utils/context.py
@@ -4,14 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from collections.abc import AsyncGenerator
 from contextvars import ContextVar
-from typing import AsyncGenerator, List, TypeVar
+from typing import TypeVar
 
 T = TypeVar("T")
 
 
 def preserve_contexts_async_generator(
-    gen: AsyncGenerator[T, None], context_vars: List[ContextVar]
+    gen: AsyncGenerator[T, None], context_vars: list[ContextVar]
 ) -> AsyncGenerator[T, None]:
     """
     Wraps an async generator to preserve context variables across iterations.
diff --git a/llama_stack/distribution/utils/exec.py b/llama_stack/distribution/utils/exec.py
index 3bf3c81ce..7c2e00524 100644
--- a/llama_stack/distribution/utils/exec.py
+++ b/llama_stack/distribution/utils/exec.py
@@ -8,6 +8,7 @@ import logging
 import os
 import signal
 import subprocess
+import sys
 
 from termcolor import cprint
 
@@ -22,8 +23,10 @@ from llama_stack.distribution.utils.image_types import LlamaStackImageType
 
 def formulate_run_args(image_type, image_name, config, template_name) -> list:
     env_name = ""
-    if image_type == LlamaStackImageType.CONTAINER.value or config.container_image:
-        env_name = f"distribution-{template_name}" if template_name else config.container_image
+    if image_type == LlamaStackImageType.CONTAINER.value:
+        env_name = (
+            f"distribution-{template_name}" if template_name else (config.container_image if config else image_name)
+        )
     elif image_type == LlamaStackImageType.CONDA.value:
         current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
         env_name = image_name or current_conda_env
@@ -31,6 +34,7 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
             cprint(
                 "No current conda environment detected, please specify a conda environment name with --image-name",
                 color="red",
+                file=sys.stderr,
             )
             return
 
@@ -47,12 +51,13 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
                     return envpath
             return None
 
-        print(f"Using conda environment: {env_name}")
+        cprint(f"Using conda environment: {env_name}", color="green", file=sys.stderr)
         conda_prefix = get_conda_prefix(env_name)
         if not conda_prefix:
             cprint(
                 f"Conda environment {env_name} does not exist.",
                 color="red",
+                file=sys.stderr,
             )
             return
 
@@ -61,6 +66,7 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
             cprint(
                 f"Build file {build_file} does not exist.\n\nPlease run `llama stack build` or specify the correct conda environment name with --image-name",
                 color="red",
+                file=sys.stderr,
             )
             return
     else:
@@ -71,9 +77,10 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
             cprint(
                 "No current virtual environment detected, please specify a virtual environment name with --image-name",
                 color="red",
+                file=sys.stderr,
             )
             return
-        print(f"Using virtual environment: {env_name}")
+        cprint(f"Using virtual environment: {env_name}", file=sys.stderr)
 
     script = importlib.resources.files("llama_stack") / "distribution/start_stack.sh"
     run_args = [
diff --git a/llama_stack/distribution/utils/prompt_for_config.py b/llama_stack/distribution/utils/prompt_for_config.py
index 9b2b99022..26f6920e0 100644
--- a/llama_stack/distribution/utils/prompt_for_config.py
+++ b/llama_stack/distribution/utils/prompt_for_config.py
@@ -8,12 +8,11 @@ import inspect
 import json
 import logging
 from enum import Enum
-from typing import Any, List, Literal, Optional, Type, Union, get_args, get_origin
+from typing import Annotated, Any, Literal, Union, get_args, get_origin
 
 from pydantic import BaseModel
 from pydantic.fields import FieldInfo
 from pydantic_core import PydanticUndefinedType
-from typing_extensions import Annotated
 
 log = logging.getLogger(__name__)
 
@@ -21,7 +20,7 @@ log = logging.getLogger(__name__)
 def is_list_of_primitives(field_type):
     """Check if a field type is a List of primitive types."""
     origin = get_origin(field_type)
-    if origin is List or origin is list:
+    if origin is list or origin is list:
         args = get_args(field_type)
         if len(args) == 1 and args[0] in (int, float, str, bool):
             return True
@@ -53,7 +52,7 @@ def get_non_none_type(field_type):
     return next(arg for arg in get_args(field_type) if arg is not type(None))
 
 
-def manually_validate_field(model: Type[BaseModel], field_name: str, value: Any):
+def manually_validate_field(model: type[BaseModel], field_name: str, value: Any):
     validators = model.__pydantic_decorators__.field_validators
     for _name, validator in validators.items():
         if field_name in validator.info.fields:
@@ -126,7 +125,7 @@ def prompt_for_discriminated_union(
 #
 # doesn't support List[nested_class] yet or Dicts of any kind. needs a bunch of
 # unit tests for coverage.
-def prompt_for_config(config_type: type[BaseModel], existing_config: Optional[BaseModel] = None) -> BaseModel:
+def prompt_for_config(config_type: type[BaseModel], existing_config: BaseModel | None = None) -> BaseModel:
     """
     Recursively prompt the user for configuration values based on a Pydantic BaseModel.
 
diff --git a/llama_stack/log.py b/llama_stack/log.py
index 3835b74a1..f4184710a 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -6,8 +6,8 @@
 
 import logging
 import os
+import sys
 from logging.config import dictConfig
-from typing import Dict, Optional
 
 from rich.console import Console
 from rich.errors import MarkupError
@@ -33,7 +33,7 @@ CATEGORIES = [
 ]
 
 # Initialize category levels with default level
-_category_levels: Dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES}
+_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES}
 
 
 def config_to_category_levels(category: str, level: str):
@@ -49,7 +49,7 @@ def config_to_category_levels(category: str, level: str):
         Dict[str, int]: A dictionary mapping categories to their log levels.
     """
 
-    category_levels: Dict[str, int] = {}
+    category_levels: dict[str, int] = {}
     level_value = logging._nameToLevel.get(str(level).upper())
     if level_value is None:
         logging.warning(f"Unknown log level '{level}' for category '{category}'. Falling back to default 'INFO'.")
@@ -69,7 +69,7 @@ def config_to_category_levels(category: str, level: str):
     return category_levels
 
 
-def parse_yaml_config(yaml_config: LoggingConfig) -> Dict[str, int]:
+def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]:
     """
     Helper function to parse a yaml logging configuration found in the run.yaml
 
@@ -86,7 +86,7 @@ def parse_yaml_config(yaml_config: LoggingConfig) -> Dict[str, int]:
     return category_levels
 
 
-def parse_environment_config(env_config: str) -> Dict[str, int]:
+def parse_environment_config(env_config: str) -> dict[str, int]:
     """
     Parse the LLAMA_STACK_LOGGING environment variable and return a dictionary of category log levels.
 
@@ -131,7 +131,7 @@ class CustomRichHandler(RichHandler):
                 self.markup = original_markup
 
 
-def setup_logging(category_levels: Dict[str, int], log_file: str | None) -> None:
+def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None:
     """
     Configure logging based on the provided category log levels and an optional log file.
 
@@ -211,7 +211,7 @@ def setup_logging(category_levels: Dict[str, int], log_file: str | None) -> None
 
 
 def get_logger(
-    name: str, category: str = "uncategorized", config: Optional[LoggingConfig] | None = None
+    name: str, category: str = "uncategorized", config: LoggingConfig | None | None = None
 ) -> logging.LoggerAdapter:
     """
     Returns a logger with the specified name and category.
@@ -235,7 +235,7 @@ def get_logger(
 
 env_config = os.environ.get("LLAMA_STACK_LOGGING", "")
 if env_config:
-    cprint(f"Environment variable LLAMA_STACK_LOGGING found: {env_config}", "yellow")
+    cprint(f"Environment variable LLAMA_STACK_LOGGING found: {env_config}", color="yellow", file=sys.stderr)
     _category_levels.update(parse_environment_config(env_config))
 
 log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
diff --git a/llama_stack/models/llama/checkpoint.py b/llama_stack/models/llama/checkpoint.py
index 2bae08a69..c9e0030e3 100644
--- a/llama_stack/models/llama/checkpoint.py
+++ b/llama_stack/models/llama/checkpoint.py
@@ -7,14 +7,14 @@
 import concurrent.futures
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
 
 import numpy as np
 import torch
 from fairscale.nn.model_parallel.initialize import get_model_parallel_rank, get_model_parallel_world_size
 
 
-def map_mp_rank(old_mp_size: int, new_mp_size: int, new_mp_rank: int) -> List[int]:
+def map_mp_rank(old_mp_size: int, new_mp_size: int, new_mp_rank: int) -> list[int]:
     """Map a new MP rank to a list of old MP ranks given a change in MP size."""
     if new_mp_size % old_mp_size == 0:
         # Read old MP shard and split it into smaller ones
@@ -31,12 +31,12 @@ def map_mp_rank(old_mp_size: int, new_mp_size: int, new_mp_rank: int) -> List[in
 
 
 def maybe_reshard_state_dict(
-    ckpt_paths: List[Path],
+    ckpt_paths: list[Path],
     n_kv_heads: int,
-    moe_num_experts: Optional[int] = None,
-    map_location: Union[str, torch.device] = "cpu",
+    moe_num_experts: int | None = None,
+    map_location: str | torch.device = "cpu",
     mmap: bool = True,
-) -> Dict[str, torch.Tensor]:
+) -> dict[str, torch.Tensor]:
     if str(map_location) == "cpu":
         torch.set_default_tensor_type(torch.BFloat16Tensor)
     else:
@@ -97,18 +97,18 @@ _MOE_WEIGHT_COLUMN_KEY = {"feed_forward.experts.moe_w_out_eF_D"}
 
 
 def reshard_mp(
-    state_dicts: List[Dict[str, torch.Tensor]],
+    state_dicts: list[dict[str, torch.Tensor]],
     size: int,
     rank: int,
     repeat_qk_qv: int = 1,
-) -> Dict[str, torch.Tensor]:
+) -> dict[str, torch.Tensor]:
     """
     Reshard a list of state dicts into a single state dict given a change in MP size.
     If the list has more than one state dict, we concatenate the values of the same
     key across all state dicts. Otherwise, we just slice it for the current MP rank.
     """
 
-    def concat_or_chunk(tensors: List[torch.Tensor], dim: int) -> torch.Tensor:
+    def concat_or_chunk(tensors: list[torch.Tensor], dim: int) -> torch.Tensor:
         if len(tensors) > 1:
             return torch.cat(tensors, dim=dim)
         return tensors[0].chunk(size, dim=dim)[rank].clone()
@@ -144,7 +144,7 @@ def reshard_mp(
     column_regex = re.compile("|".join(column_keys))
     row_regex = re.compile("|".join(row_keys))
 
-    output: Dict[str, torch.Tensor] = {}
+    output: dict[str, torch.Tensor] = {}
     with concurrent.futures.ThreadPoolExecutor() as executor:
         # Note: only processes keys in the first state dict.
         # Assumes keys are the same across all state dicts.
@@ -154,7 +154,7 @@ def reshard_mp(
     return output
 
 
-def convert_moe_weights(state_dict: Dict[str, Any], num_experts: int) -> Dict[str, Any]:
+def convert_moe_weights(state_dict: dict[str, Any], num_experts: int) -> dict[str, Any]:
     routed_keys = _MOE_WEIGHT_ROW_KEY | _MOE_WEIGHT_COLUMN_KEY
     routed_regex = re.compile("|".join(routed_keys))
     keys = list(state_dict.keys())
diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py
index 48cb51005..f9f094c3d 100644
--- a/llama_stack/models/llama/datatypes.py
+++ b/llama_stack/models/llama/datatypes.py
@@ -7,10 +7,9 @@
 import base64
 from enum import Enum
 from io import BytesIO
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Annotated, Any, Literal
 
 from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
-from typing_extensions import Annotated
 
 # The goal is that these set of types are relevant for all Llama models.
 # That isn't the current state yet -- e.g., BuiltinTool is somewhat specific to
@@ -31,21 +30,21 @@ class BuiltinTool(Enum):
     code_interpreter = "code_interpreter"
 
 
-Primitive = Union[str, int, float, bool, None]
-RecursiveType = Union[Primitive, List[Primitive], Dict[str, Primitive]]
+Primitive = str | int | float | bool | None
+RecursiveType = Primitive | list[Primitive] | dict[str, Primitive]
 
 
 class ToolCall(BaseModel):
     call_id: str
-    tool_name: Union[BuiltinTool, str]
+    tool_name: BuiltinTool | str
     # Plan is to deprecate the Dict in favor of a JSON string
     # that is parsed on the client side instead of trying to manage
     # the recursive type here.
     # Making this a union so that client side can start prepping for this change.
     # Eventually, we will remove both the Dict and arguments_json field,
     # and arguments will just be a str
-    arguments: Union[str, Dict[str, RecursiveType]]
-    arguments_json: Optional[str] = None
+    arguments: str | dict[str, RecursiveType]
+    arguments_json: str | None = None
 
     @field_validator("tool_name", mode="before")
     @classmethod
@@ -91,15 +90,15 @@ class StopReason(Enum):
 
 class ToolParamDefinition(BaseModel):
     param_type: str
-    description: Optional[str] = None
-    required: Optional[bool] = True
-    default: Optional[Any] = None
+    description: str | None = None
+    required: bool | None = True
+    default: Any | None = None
 
 
 class ToolDefinition(BaseModel):
-    tool_name: Union[BuiltinTool, str]
-    description: Optional[str] = None
-    parameters: Optional[Dict[str, ToolParamDefinition]] = None
+    tool_name: BuiltinTool | str
+    description: str | None = None
+    parameters: dict[str, ToolParamDefinition] | None = None
 
     @field_validator("tool_name", mode="before")
     @classmethod
@@ -119,7 +118,7 @@ class RawMediaItem(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     @field_serializer("data")
-    def serialize_data(self, data: Optional[bytes], _info):
+    def serialize_data(self, data: bytes | None, _info):
         if data is None:
             return None
         return base64.b64encode(data).decode("utf-8")
@@ -137,9 +136,9 @@ class RawTextItem(BaseModel):
     text: str
 
 
-RawContentItem = Annotated[Union[RawTextItem, RawMediaItem], Field(discriminator="type")]
+RawContentItem = Annotated[RawTextItem | RawMediaItem, Field(discriminator="type")]
 
-RawContent = str | RawContentItem | List[RawContentItem]
+RawContent = str | RawContentItem | list[RawContentItem]
 
 
 class RawMessage(BaseModel):
@@ -147,17 +146,17 @@ class RawMessage(BaseModel):
     content: RawContent
 
     # This is for RAG but likely should be absorbed into content
-    context: Optional[RawContent] = None
+    context: RawContent | None = None
 
     # These are for the output message coming from the assistant
-    stop_reason: Optional[StopReason] = None
-    tool_calls: List[ToolCall] = Field(default_factory=list)
+    stop_reason: StopReason | None = None
+    tool_calls: list[ToolCall] = Field(default_factory=list)
 
 
 class GenerationResult(BaseModel):
     token: int
     text: str
-    logprobs: Optional[List[float]] = None
+    logprobs: list[float] | None = None
 
     source: Literal["input"] | Literal["output"]
 
diff --git a/llama_stack/models/llama/llama3/args.py b/llama_stack/models/llama/llama3/args.py
index f7e4b4557..4f92874f5 100644
--- a/llama_stack/models/llama/llama3/args.py
+++ b/llama_stack/models/llama/llama3/args.py
@@ -6,7 +6,6 @@
 
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional
 
 
 class QuantizationScheme(Enum):
@@ -15,8 +14,8 @@ class QuantizationScheme(Enum):
 
 @dataclass
 class QuantizationArgs:
-    scheme: Optional[QuantizationScheme] = None
-    group_size: Optional[int] = None
+    scheme: QuantizationScheme | None = None
+    group_size: int | None = None
     spinquant: bool = False
 
     def __init__(self, **kwargs):
@@ -39,10 +38,10 @@ class ModelArgs:
     dim: int = 4096
     n_layers: int = 32
     n_heads: int = 32
-    n_kv_heads: Optional[int] = None
+    n_kv_heads: int | None = None
     vocab_size: int = -1
     multiple_of: int = 256  # make SwiGLU hidden layer size multiple of large power of 2
-    ffn_dim_multiplier: Optional[float] = None
+    ffn_dim_multiplier: float | None = None
     norm_eps: float = 1e-5
     rope_theta: float = 500000
     use_scaled_rope: bool = False
@@ -55,8 +54,8 @@ class ModelArgs:
     vision_max_num_chunks: int = 4
     vision_num_cross_attention_layers: int = -1
 
-    quantization_args: Optional[QuantizationArgs] = None
-    lora_args: Optional[LoRAArgs] = None
+    quantization_args: QuantizationArgs | None = None
+    lora_args: LoRAArgs | None = None
 
     def __init__(self, **kwargs):
         for k, v in kwargs.items():
diff --git a/llama_stack/models/llama/llama3/chat_format.py b/llama_stack/models/llama/llama3/chat_format.py
index fe7a7a898..7bb05d8db 100644
--- a/llama_stack/models/llama/llama3/chat_format.py
+++ b/llama_stack/models/llama/llama3/chat_format.py
@@ -8,7 +8,6 @@ import io
 import json
 import uuid
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple
 
 from PIL import Image as PIL_Image
 
@@ -29,14 +28,14 @@ from .tool_utils import ToolUtils
 
 @dataclass
 class VisionInput:
-    mask: List[List[int]]
-    images: List[PIL_Image.Image]
+    mask: list[list[int]]
+    images: list[PIL_Image.Image]
 
 
 @dataclass
 class LLMInput:
-    tokens: List[int]
-    vision: Optional[VisionInput] = None
+    tokens: list[int]
+    vision: VisionInput | None = None
 
 
 def role_str(role: Role) -> str:
@@ -50,7 +49,7 @@ def role_str(role: Role) -> str:
 
 
 class ChatFormat:
-    possible_headers: Dict[Role, str]
+    possible_headers: dict[Role, str]
 
     def __init__(self, tokenizer: Tokenizer):
         self.tokenizer = tokenizer
@@ -58,7 +57,7 @@ class ChatFormat:
         self.possible_headers = {role: f"<|start_header_id|>{role_str(role)}<|end_header_id|>\n\n" for role in Role}
         self.vision_token = self.tokenizer.special_tokens["<|image|>"]
 
-    def _encode_header(self, role: str) -> List[int]:
+    def _encode_header(self, role: str) -> list[int]:
         tokens = []
         tokens.append(self.tokenizer.special_tokens["<|start_header_id|>"])
         tokens.extend(self.tokenizer.encode("ipython" if role == "tool" else role, bos=False, eos=False))
@@ -70,7 +69,7 @@ class ChatFormat:
         tokens, images = self._encode_content(content, bos=True)
         return self._model_input_from_tokens_images(tokens, images)
 
-    def _encode_content(self, content: RawContent, bos: bool = False) -> Tuple[List[int], List[PIL_Image.Image]]:
+    def _encode_content(self, content: RawContent, bos: bool = False) -> tuple[list[int], list[PIL_Image.Image]]:
         tokens = []
         images = []
 
@@ -107,7 +106,7 @@ class ChatFormat:
 
     def encode_message(
         self, message: RawMessage, tool_prompt_format: ToolPromptFormat
-    ) -> Tuple[List[int], List[PIL_Image.Image]]:
+    ) -> tuple[list[int], list[PIL_Image.Image]]:
         tokens = self._encode_header(message.role)
         images = []
 
@@ -145,8 +144,8 @@ class ChatFormat:
 
     def encode_dialog_prompt(
         self,
-        messages: List[RawMessage],
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
+        messages: list[RawMessage],
+        tool_prompt_format: ToolPromptFormat | None = None,
     ) -> LLMInput:
         tool_prompt_format = tool_prompt_format or ToolPromptFormat.json
         tokens = []
@@ -163,7 +162,7 @@ class ChatFormat:
         return self._model_input_from_tokens_images(tokens, images)
 
     # TODO(this should be generic, not only for assistant messages)
-    def decode_assistant_message(self, tokens: List[int], stop_reason: StopReason) -> RawMessage:
+    def decode_assistant_message(self, tokens: list[int], stop_reason: StopReason) -> RawMessage:
         content = self.tokenizer.decode(tokens)
 
         return self.decode_assistant_message_from_content(content, stop_reason)
@@ -234,7 +233,7 @@ class ChatFormat:
             tool_calls=tool_calls,
         )
 
-    def _model_input_from_tokens_images(self, tokens: List[int], images: List[PIL_Image.Image]) -> LLMInput:
+    def _model_input_from_tokens_images(self, tokens: list[int], images: list[PIL_Image.Image]) -> LLMInput:
         vision_input = None
         if len(images) > 0:
             vision_input = VisionInput(
@@ -249,9 +248,9 @@ class ChatFormat:
 
 
 def create_vision_mask(
-    tokens: List[int],
+    tokens: list[int],
     vision_token: int,
-) -> List[List[int]]:
+) -> list[list[int]]:
     vision_token_locations = [i for i, token in enumerate(tokens) if token == vision_token]
     if len(vision_token_locations) == 0:
         return []
diff --git a/llama_stack/models/llama/llama3/generation.py b/llama_stack/models/llama/llama3/generation.py
index 35c140707..fe7be5ea9 100644
--- a/llama_stack/models/llama/llama3/generation.py
+++ b/llama_stack/models/llama/llama3/generation.py
@@ -15,8 +15,8 @@ import json
 import os
 import sys
 import time
+from collections.abc import Callable, Generator
 from pathlib import Path
-from typing import Callable, Generator, List, Optional
 
 import torch
 import torch.nn.functional as F
@@ -41,8 +41,8 @@ class Llama3:
         ckpt_dir: str,
         max_seq_len: int,
         max_batch_size: int,
-        world_size: Optional[int] = None,
-        quantization_mode: Optional[QuantizationMode] = None,
+        world_size: int | None = None,
+        quantization_mode: QuantizationMode | None = None,
         seed: int = 1,
         device: str = "cuda",
     ):
@@ -82,7 +82,7 @@ class Llama3:
         ckpt_paths = sorted(Path(ckpt_dir).glob("*.pth"))
         assert len(ckpt_paths) > 0, f"no checkpoint files found in {ckpt_dir}"
         print(f"Loading a checkpoint (shards={len(ckpt_paths)}, current-mp-size={world_size})")
-        with open(Path(ckpt_dir) / "params.json", "r") as f:
+        with open(Path(ckpt_dir) / "params.json") as f:
             params = json.loads(f.read())
 
         model_args: ModelArgs = ModelArgs(
@@ -154,15 +154,15 @@ class Llama3:
     @torch.inference_mode()
     def generate(
         self,
-        llm_inputs: List[LLMInput],
+        llm_inputs: list[LLMInput],
         temperature: float = 0.6,
         top_p: float = 0.9,
-        max_gen_len: Optional[int] = None,
+        max_gen_len: int | None = None,
         logprobs: bool = False,
         echo: bool = False,
         print_model_input: bool = False,
-        logits_processor: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
-    ) -> Generator[List[GenerationResult], None, None]:
+        logits_processor: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
+    ) -> Generator[list[GenerationResult], None, None]:
         if max_gen_len is None or max_gen_len == 0 or max_gen_len >= self.args.max_seq_len:
             max_gen_len = self.args.max_seq_len - 1
         params = self.model.params
@@ -174,6 +174,7 @@ class Llama3:
                 cprint(
                     "Input to model:\n" + self.tokenizer.decode(tokens_to_print) + "\n",
                     "red",
+                    file=sys.stderr,
                 )
         prompt_tokens = [inp.tokens for inp in llm_inputs]
 
@@ -184,7 +185,11 @@ class Llama3:
         max_prompt_len = max(len(t) for t in prompt_tokens)
 
         if max_prompt_len >= params.max_seq_len:
-            cprint(f"Out of token budget {max_prompt_len} vs {params.max_seq_len}", "red")
+            cprint(
+                f"Out of token budget {max_prompt_len} vs {params.max_seq_len}",
+                color="red",
+                file=sys.stderr,
+            )
             return
 
         total_len = min(max_gen_len + max_prompt_len, params.max_seq_len)
@@ -302,13 +307,13 @@ class Llama3:
 
     def completion(
         self,
-        contents: List[RawContent],
+        contents: list[RawContent],
         temperature: float = 0.6,
         top_p: float = 0.9,
-        max_gen_len: Optional[int] = None,
+        max_gen_len: int | None = None,
         logprobs: bool = False,
         echo: bool = False,
-    ) -> Generator[List[GenerationResult], None, None]:
+    ) -> Generator[list[GenerationResult], None, None]:
         model_inputs = [self.formatter.encode_content(c) for c in contents]
         for result in self.generate(
             model_inputs=model_inputs,
@@ -324,14 +329,14 @@ class Llama3:
 
     def chat_completion(
         self,
-        messages_batch: List[List[RawMessage]],
+        messages_batch: list[list[RawMessage]],
         temperature: float = 0.6,
         top_p: float = 0.9,
-        max_gen_len: Optional[int] = None,
+        max_gen_len: int | None = None,
         logprobs: bool = False,
         tool_prompt_format: ToolPromptFormat = ToolPromptFormat.json,
         echo: bool = False,
-    ) -> Generator[List[GenerationResult], None, None]:
+    ) -> Generator[list[GenerationResult], None, None]:
         model_inputs = [self.formatter.encode_dialog_prompt(messages) for messages in messages_batch]
         for result in self.generate(
             model_inputs=model_inputs,
diff --git a/llama_stack/models/llama/llama3/interface.py b/llama_stack/models/llama/llama3/interface.py
index 8684237df..b63ba4847 100644
--- a/llama_stack/models/llama/llama3/interface.py
+++ b/llama_stack/models/llama/llama3/interface.py
@@ -12,7 +12,6 @@
 # the top-level of this source tree.
 
 from pathlib import Path
-from typing import List, Optional
 
 from termcolor import colored
 
@@ -131,7 +130,7 @@ class LLama31Interface:
         self.formatter = ChatFormat(self.tokenizer)
         self.tool_prompt_format = tool_prompt_format
 
-    def get_tokens(self, messages: List[RawMessage]) -> List[int]:
+    def get_tokens(self, messages: list[RawMessage]) -> list[int]:
         model_input = self.formatter.encode_dialog_prompt(
             messages,
             self.tool_prompt_format,
@@ -149,10 +148,10 @@ class LLama31Interface:
 
     def system_messages(
         self,
-        builtin_tools: List[BuiltinTool],
-        custom_tools: List[ToolDefinition],
-        instruction: Optional[str] = None,
-    ) -> List[RawMessage]:
+        builtin_tools: list[BuiltinTool],
+        custom_tools: list[ToolDefinition],
+        instruction: str | None = None,
+    ) -> list[RawMessage]:
         messages = []
 
         default_gen = SystemDefaultGenerator()
@@ -194,8 +193,8 @@ class LLama31Interface:
         self,
         content: str,
         stop_reason: StopReason,
-        tool_call: Optional[ToolCall] = None,
-    ) -> List[RawMessage]:
+        tool_call: ToolCall | None = None,
+    ) -> list[RawMessage]:
         tool_calls = []
         if tool_call:
             tool_calls.append(tool_call)
@@ -208,7 +207,7 @@ class LLama31Interface:
             )
         ]
 
-    def user_message(self, content: str) -> List[RawMessage]:
+    def user_message(self, content: str) -> list[RawMessage]:
         return [RawMessage(role="user", content=content)]
 
     def display_message_as_tokens(self, message: RawMessage) -> None:
@@ -228,7 +227,7 @@ class LLama31Interface:
         print("\n", end="")
 
 
-def list_jinja_templates() -> List[Template]:
+def list_jinja_templates() -> list[Template]:
     return TEMPLATES
 
 
diff --git a/llama_stack/models/llama/llama3/model.py b/llama_stack/models/llama/llama3/model.py
index 2562673e2..88f748c1d 100644
--- a/llama_stack/models/llama/llama3/model.py
+++ b/llama_stack/models/llama/llama3/model.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import math
-from typing import Optional, Tuple
 
 import fairscale.nn.model_parallel.initialize as fs_init
 import torch
@@ -80,7 +79,7 @@ def apply_rotary_emb(
     xq: torch.Tensor,
     xk: torch.Tensor,
     freqs_cis: torch.Tensor,
-) -> Tuple[torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor]:
     xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
     xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
     freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
@@ -162,7 +161,7 @@ class Attention(nn.Module):
         x: torch.Tensor,
         start_pos: int,
         freqs_cis: torch.Tensor,
-        mask: Optional[torch.Tensor],
+        mask: torch.Tensor | None,
     ):
         bsz, seqlen, _ = x.shape
         xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
@@ -204,7 +203,7 @@ class FeedForward(nn.Module):
         dim: int,
         hidden_dim: int,
         multiple_of: int,
-        ffn_dim_multiplier: Optional[float],
+        ffn_dim_multiplier: float | None,
     ):
         super().__init__()
         hidden_dim = int(2 * hidden_dim / 3)
@@ -243,7 +242,7 @@ class TransformerBlock(nn.Module):
         x: torch.Tensor,
         start_pos: int,
         freqs_cis: torch.Tensor,
-        mask: Optional[torch.Tensor],
+        mask: torch.Tensor | None,
     ):
         h = x + self.attention(self.attention_norm(x), start_pos, freqs_cis, mask)
         out = h + self.feed_forward(self.ffn_norm(h))
diff --git a/llama_stack/models/llama/llama3/multimodal/image_transform.py b/llama_stack/models/llama/llama3/multimodal/image_transform.py
index c156d6d2e..f2761ee47 100644
--- a/llama_stack/models/llama/llama3/multimodal/image_transform.py
+++ b/llama_stack/models/llama/llama3/multimodal/image_transform.py
@@ -14,7 +14,7 @@
 import math
 from collections import defaultdict
 from logging import getLogger
-from typing import Any, Optional, Set, Tuple
+from typing import Any
 
 import torch
 import torchvision.transforms as tv
@@ -26,7 +26,7 @@ IMAGE_RES = 224
 logger = getLogger()
 
 
-class VariableSizeImageTransform(object):
+class VariableSizeImageTransform:
     """
     This class accepts images of any size and dynamically resize, pads and chunks it
     based on the image aspect ratio and the number of image chunks we allow.
@@ -75,7 +75,7 @@ class VariableSizeImageTransform(object):
         self.resample = tv.InterpolationMode.BILINEAR
 
     @staticmethod
-    def get_factors(n: int) -> Set[int]:
+    def get_factors(n: int) -> set[int]:
         """
         Calculate all factors of a given number, i.e. a dividor that leaves
         no remainder. For example, if n=12, it will return {1, 2, 3, 4, 6, 12}.
@@ -145,9 +145,9 @@ class VariableSizeImageTransform(object):
 
     @staticmethod
     def get_max_res_without_distortion(
-        image_size: Tuple[int, int],
-        target_size: Tuple[int, int],
-    ) -> Tuple[int, int]:
+        image_size: tuple[int, int],
+        target_size: tuple[int, int],
+    ) -> tuple[int, int]:
         """
         Determines the maximum resolution to which an image can be resized to without distorting its
         aspect ratio, based on the target resolution.
@@ -198,8 +198,8 @@ class VariableSizeImageTransform(object):
     def resize_without_distortion(
         self,
         image: torch.Tensor,
-        target_size: Tuple[int, int],
-        max_upscaling_size: Optional[int],
+        target_size: tuple[int, int],
+        max_upscaling_size: int | None,
     ) -> torch.Tensor:
         """
         Used to resize an image to target_resolution, without distortion.
@@ -261,10 +261,10 @@ class VariableSizeImageTransform(object):
 
     def get_best_fit(
         self,
-        image_size: Tuple[int, int],
+        image_size: tuple[int, int],
         possible_resolutions: torch.Tensor,
         resize_to_max_canvas: bool = False,
-    ) -> Tuple[int, int]:
+    ) -> tuple[int, int]:
         """
         Determines the best canvas possible from a list of possible resolutions to, without distortion,
         resize an image to.
@@ -364,7 +364,7 @@ class VariableSizeImageTransform(object):
         max_num_chunks: int,
         normalize_img: bool = True,
         resize_to_max_canvas: bool = False,
-    ) -> Tuple[Any, Any]:
+    ) -> tuple[Any, Any]:
         """
         Args:
             image (PIL.Image): Image to be resized.
diff --git a/llama_stack/models/llama/llama3/multimodal/model.py b/llama_stack/models/llama/llama3/multimodal/model.py
index 0cb18b948..5f1c3605c 100644
--- a/llama_stack/models/llama/llama3/multimodal/model.py
+++ b/llama_stack/models/llama/llama3/multimodal/model.py
@@ -6,8 +6,9 @@
 
 import logging
 import math
+from collections.abc import Callable
 from functools import partial
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any
 
 import fairscale.nn.model_parallel.initialize as fs_init
 import torch
@@ -104,9 +105,9 @@ class ColumnParallelConv2dPatch(torch.nn.Module):
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Union[int, Tuple[int, int]],
-        stride: Union[int, Tuple[int, int]],
-        bias: Optional[bool] = False,
+        kernel_size: int | tuple[int, int],
+        stride: int | tuple[int, int],
+        bias: bool | None = False,
     ) -> None:
         super().__init__()
         if isinstance(kernel_size, int):
@@ -390,13 +391,13 @@ class VisionEncoder(nn.Module):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool = True,
-        missing_keys: List[str] = None,
-        unexpected_keys: List[str] = None,
-        error_msgs: List[str] = None,
+        missing_keys: list[str] = None,
+        unexpected_keys: list[str] = None,
+        error_msgs: list[str] = None,
         return_state_dict: bool = False,
     ) -> None:
         orig_pos_embed = state_dict.get(prefix + "positional_embedding")
@@ -641,7 +642,7 @@ class FeedForward(nn.Module):
         dim: int,
         hidden_dim: int,
         multiple_of: int,
-        ffn_dim_multiplier: Optional[float],
+        ffn_dim_multiplier: float | None,
     ):
         """
         Initialize the FeedForward module.
@@ -983,7 +984,7 @@ class CrossAttentionTransformerBlock(torch.nn.Module):
         self,
         x: torch.Tensor,
         xattn_mask: torch.Tensor,
-        full_text_row_masked_out_mask: Tuple[torch.Tensor, torch.Tensor],
+        full_text_row_masked_out_mask: tuple[torch.Tensor, torch.Tensor],
         xattn_cache: torch.Tensor,
     ) -> torch.Tensor:
         _attn_out = self.attention(
@@ -1144,7 +1145,7 @@ class CrossAttentionTransformerText(torch.nn.Module):
     def _init_fusion_schedule(
         self,
         num_layers: int,
-    ) -> List[int]:
+    ) -> list[int]:
         llama_layers = list(range(self.n_llama_layers))
 
         # uniformly spread the layers
@@ -1231,7 +1232,7 @@ class CrossAttentionTransformerText(torch.nn.Module):
         text_dtype,
         vision_tokens,
         cross_attention_masks,
-    ) -> Tuple[Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor]:
         assert vision_tokens is not None, "Vision tokens must be provided"
         vision_seqlen = vision_tokens.shape[3]
         assert vision_tokens.shape[1] == cross_attention_masks.shape[2], (
@@ -1280,11 +1281,11 @@ class CrossAttentionTransformer(torch.nn.Module):
 
     def compute_vision_tokens_masks(
         self,
-        batch_images: List[List[PIL_Image.Image]],
-        batch_masks: List[List[List[int]]],
+        batch_images: list[list[PIL_Image.Image]],
+        batch_masks: list[list[list[int]]],
         total_len: int,
         device: torch.device,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         skip_vision_encoder = False
 
         assert len(batch_images) == len(batch_masks), "Images and masks must have the same length"
@@ -1371,11 +1372,11 @@ class CrossAttentionTransformer(torch.nn.Module):
 
 
 def _stack_images(
-    images: List[List[PIL_Image.Image]],
+    images: list[list[PIL_Image.Image]],
     max_num_chunks: int,
     image_res: int,
     max_num_images: int,
-) -> Tuple[torch.Tensor, List[int]]:
+) -> tuple[torch.Tensor, list[int]]:
     """
     Takes a list of list of images and stacks them into a tensor.
     This function is needed since images can be of completely
@@ -1400,8 +1401,8 @@ def _stack_images(
 
 
 def _pad_masks(
-    all_masks: List[List[List[int]]],
-    all_num_chunks: List[List[int]],
+    all_masks: list[list[list[int]]],
+    all_num_chunks: list[list[int]],
     total_len: int,
     max_num_chunks: int,
 ) -> torch.Tensor:
diff --git a/llama_stack/models/llama/llama3/prompt_templates/base.py b/llama_stack/models/llama/llama3/prompt_templates/base.py
index bff2a21e1..0081443be 100644
--- a/llama_stack/models/llama/llama3/prompt_templates/base.py
+++ b/llama_stack/models/llama/llama3/prompt_templates/base.py
@@ -12,7 +12,7 @@
 # the top-level of this source tree.
 
 from dataclasses import dataclass
-from typing import Any, Dict, List
+from typing import Any
 
 from jinja2 import Template
 
@@ -20,7 +20,7 @@ from jinja2 import Template
 @dataclass
 class PromptTemplate:
     template: str
-    data: Dict[str, Any]
+    data: dict[str, Any]
 
     def render(self):
         template = Template(self.template)
@@ -35,5 +35,5 @@ class PromptTemplateGeneratorBase:
     def gen(self, *args, **kwargs) -> PromptTemplate:
         raise NotImplementedError()
 
-    def data_examples(self) -> List[Any]:
+    def data_examples(self) -> list[Any]:
         raise NotImplementedError()
diff --git a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
index fbc0127fd..ab626e5af 100644
--- a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
+++ b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
@@ -13,7 +13,7 @@
 
 import textwrap
 from datetime import datetime
-from typing import Any, List, Optional
+from typing import Any
 
 from llama_stack.apis.inference import (
     BuiltinTool,
@@ -39,12 +39,12 @@ class SystemDefaultGenerator(PromptTemplateGeneratorBase):
             },
         )
 
-    def data_examples(self) -> List[Any]:
+    def data_examples(self) -> list[Any]:
         return [None]
 
 
 class BuiltinToolGenerator(PromptTemplateGeneratorBase):
-    def _tool_breakdown(self, tools: List[ToolDefinition]):
+    def _tool_breakdown(self, tools: list[ToolDefinition]):
         builtin_tools, custom_tools = [], []
         for dfn in tools:
             if isinstance(dfn.tool_name, BuiltinTool):
@@ -54,7 +54,7 @@ class BuiltinToolGenerator(PromptTemplateGeneratorBase):
 
         return builtin_tools, custom_tools
 
-    def gen(self, tools: List[ToolDefinition]) -> PromptTemplate:
+    def gen(self, tools: list[ToolDefinition]) -> PromptTemplate:
         builtin_tools, custom_tools = self._tool_breakdown(tools)
         template_str = textwrap.dedent(
             """
@@ -75,7 +75,7 @@ class BuiltinToolGenerator(PromptTemplateGeneratorBase):
             },
         )
 
-    def data_examples(self) -> List[List[ToolDefinition]]:
+    def data_examples(self) -> list[list[ToolDefinition]]:
         return [
             # builtin tools
             [
@@ -91,7 +91,7 @@ class BuiltinToolGenerator(PromptTemplateGeneratorBase):
 
 
 class JsonCustomToolGenerator(PromptTemplateGeneratorBase):
-    def gen(self, custom_tools: List[ToolDefinition]) -> PromptTemplate:
+    def gen(self, custom_tools: list[ToolDefinition]) -> PromptTemplate:
         template_str = textwrap.dedent(
             """
             Answer the user's question by making use of the following functions if needed.
@@ -137,7 +137,7 @@ class JsonCustomToolGenerator(PromptTemplateGeneratorBase):
             {"custom_tools": [t.model_dump() for t in custom_tools]},
         )
 
-    def data_examples(self) -> List[List[ToolDefinition]]:
+    def data_examples(self) -> list[list[ToolDefinition]]:
         return [
             [
                 ToolDefinition(
@@ -161,7 +161,7 @@ class JsonCustomToolGenerator(PromptTemplateGeneratorBase):
 
 
 class FunctionTagCustomToolGenerator(PromptTemplateGeneratorBase):
-    def gen(self, custom_tools: List[ToolDefinition]) -> PromptTemplate:
+    def gen(self, custom_tools: list[ToolDefinition]) -> PromptTemplate:
         template_str = textwrap.dedent(
             """
             You have access to the following functions:
@@ -199,7 +199,7 @@ class FunctionTagCustomToolGenerator(PromptTemplateGeneratorBase):
             {"custom_tools": [t.model_dump() for t in custom_tools]},
         )
 
-    def data_examples(self) -> List[List[ToolDefinition]]:
+    def data_examples(self) -> list[list[ToolDefinition]]:
         return [
             [
                 ToolDefinition(
@@ -238,14 +238,14 @@ class PythonListCustomToolGenerator(PromptTemplateGeneratorBase):  # noqa: N801
         """.strip("\n")
     )
 
-    def gen(self, custom_tools: List[ToolDefinition], system_prompt: Optional[str] = None) -> PromptTemplate:
+    def gen(self, custom_tools: list[ToolDefinition], system_prompt: str | None = None) -> PromptTemplate:
         system_prompt = system_prompt or self.DEFAULT_PROMPT
         return PromptTemplate(
             system_prompt,
             {"function_description": self._gen_function_description(custom_tools)},
         )
 
-    def _gen_function_description(self, custom_tools: List[ToolDefinition]) -> PromptTemplate:
+    def _gen_function_description(self, custom_tools: list[ToolDefinition]) -> str:
         template_str = textwrap.dedent(
             """
             Here is a list of functions in JSON format that you can invoke.
@@ -286,12 +286,14 @@ class PythonListCustomToolGenerator(PromptTemplateGeneratorBase):  # noqa: N801
 
             """
         )
-        return PromptTemplate(
+        template = PromptTemplate(
             template_str.strip("\n"),
             {"tools": [t.model_dump() for t in custom_tools]},
-        ).render()
+        )
+        rendered: str = template.render()
+        return rendered
 
-    def data_examples(self) -> List[List[ToolDefinition]]:
+    def data_examples(self) -> list[list[ToolDefinition]]:
         return [
             [
                 ToolDefinition(
diff --git a/llama_stack/models/llama/llama3/prompt_templates/tool_response.py b/llama_stack/models/llama/llama3/prompt_templates/tool_response.py
index 3df4dac14..4da171279 100644
--- a/llama_stack/models/llama/llama3/prompt_templates/tool_response.py
+++ b/llama_stack/models/llama/llama3/prompt_templates/tool_response.py
@@ -12,7 +12,6 @@
 # the top-level of this source tree.
 
 import textwrap
-from typing import Optional
 
 from .base import PromptTemplate, PromptTemplateGeneratorBase
 
@@ -21,8 +20,8 @@ class ToolResponseGenerator(PromptTemplateGeneratorBase):
     def gen(
         self,
         status: str,
-        stdout: Optional[str] = None,
-        stderr: Optional[str] = None,
+        stdout: str | None = None,
+        stderr: str | None = None,
     ):
         assert status in [
             "success",
diff --git a/llama_stack/models/llama/llama3/quantization/loader.py b/llama_stack/models/llama/llama3/quantization/loader.py
index 771fd02be..436cfa6fa 100644
--- a/llama_stack/models/llama/llama3/quantization/loader.py
+++ b/llama_stack/models/llama/llama3/quantization/loader.py
@@ -6,7 +6,7 @@
 
 # type: ignore
 import os
-from typing import Any, Dict, List, Optional, cast
+from typing import Any, cast
 
 import torch
 from fairscale.nn.model_parallel.initialize import get_model_parallel_rank
@@ -37,9 +37,9 @@ def swiglu_wrapper(
 def convert_to_quantized_model(
     model: Transformer | CrossAttentionTransformer,
     checkpoint_dir: str,
-    quantization_mode: Optional[str] = None,
-    fp8_activation_scale_ub: Optional[float] = 1200.0,
-    device: Optional[torch.device] = None,
+    quantization_mode: str | None = None,
+    fp8_activation_scale_ub: float | None = 1200.0,
+    device: torch.device | None = None,
 ) -> Transformer | CrossAttentionTransformer:
     if quantization_mode == QuantizationMode.fp8_mixed:
         return convert_to_fp8_quantized_model(model, checkpoint_dir, fp8_activation_scale_ub, device)
@@ -52,8 +52,8 @@ def convert_to_quantized_model(
 def convert_to_fp8_quantized_model(
     model: Transformer,
     checkpoint_dir: str,
-    fp8_activation_scale_ub: Optional[float] = 1200.0,
-    device: Optional[torch.device] = None,
+    fp8_activation_scale_ub: float | None = 1200.0,
+    device: torch.device | None = None,
 ) -> Transformer:
     # Move weights to GPU with quantization
     fp8_scales_path = os.path.join(checkpoint_dir, f"fp8_scales_{get_model_parallel_rank()}.pt")
@@ -122,8 +122,8 @@ class Int8DynActInt4WeightLinearLoRA(Int8DynActInt4WeightLinear):
         precision: torch.dtype = torch.float32,
         scales_precision: torch.dtype = torch.float32,
         # LoRA parameters
-        lora_rank: Optional[int] = None,
-        lora_scale: Optional[float] = None,
+        lora_rank: int | None = None,
+        lora_scale: float | None = None,
     ) -> None:
         super().__init__(
             in_features,
@@ -134,8 +134,8 @@ class Int8DynActInt4WeightLinearLoRA(Int8DynActInt4WeightLinear):
             precision=precision,
             scales_precision=scales_precision,
         )
-        self.lora_scale: Optional[float] = None
-        self.adaptor: Optional[nn.Sequential] = None
+        self.lora_scale: float | None = None
+        self.adaptor: nn.Sequential | None = None
         if lora_rank is not None:
             assert lora_scale is not None, "Please specify lora scale for LoRA."
             # Low-rank adaptation. See paper for more details: https://arxiv.org/abs/2106.09685
@@ -147,13 +147,13 @@ class Int8DynActInt4WeightLinearLoRA(Int8DynActInt4WeightLinear):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ) -> None:
         """A hook to load the quantized weights from the state dict."""
         if prefix + "zeros" not in state_dict:
@@ -191,13 +191,13 @@ class Int8WeightEmbedding(torch.nn.Embedding):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ) -> None:
         """A hook to load the quantized embedding weight and scales from the state dict."""
         weights = state_dict.pop(prefix + "weight")
@@ -221,13 +221,13 @@ class Int8WeightLinear(torch.nn.Linear):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ) -> None:
         """A hook to load the quantized linear weight and scales from the state dict."""
         weights = state_dict.pop(prefix + "weight")
@@ -238,8 +238,8 @@ class Int8WeightLinear(torch.nn.Linear):
 def _prepare_model_int4_weight_int8_dynamic_activation(
     model: torch.nn.Module,
     group_size: int,
-    lora_rank: Optional[int],
-    lora_scale: Optional[float],
+    lora_rank: int | None,
+    lora_scale: float | None,
 ):
     """Prepare the model for int4 weight and int8 dynamic activation quantization.
 
@@ -265,7 +265,7 @@ def _prepare_model_int4_weight_int8_dynamic_activation(
             )
             del module
             setattr(model, module_name, quantized_module)
-        elif isinstance(module, (ColumnParallelLinear, RowParallelLinear, nn.Linear)):
+        elif isinstance(module, ColumnParallelLinear | RowParallelLinear | nn.Linear):
             quantized_module = Int8DynActInt4WeightLinearLoRA(
                 in_features=module.in_features,
                 out_features=module.out_features,
@@ -286,7 +286,7 @@ def _prepare_model_int4_weight_int8_dynamic_activation(
 def convert_to_int4_quantized_model(
     model: Transformer | CrossAttentionTransformer,
     checkpoint_dir: str,
-    device: Optional[torch.device] = None,
+    device: torch.device | None = None,
 ) -> Transformer | CrossAttentionTransformer:
     """Convert the model to int4 quantized model."""
     model_args = model.params
diff --git a/llama_stack/models/llama/llama3/tokenizer.py b/llama_stack/models/llama/llama3/tokenizer.py
index d3cc4fc07..e5ada3599 100644
--- a/llama_stack/models/llama/llama3/tokenizer.py
+++ b/llama_stack/models/llama/llama3/tokenizer.py
@@ -5,18 +5,11 @@
 # the root directory of this source tree.
 
 import os
+from collections.abc import Collection, Iterator, Sequence, Set
 from logging import getLogger
 from pathlib import Path
 from typing import (
-    AbstractSet,
-    Collection,
-    Dict,
-    Iterator,
-    List,
     Literal,
-    Optional,
-    Sequence,
-    Union,
     cast,
 )
 
@@ -44,7 +37,7 @@ class Tokenizer:
     Tokenizing and encoding/decoding text using the Tiktoken tokenizer.
     """
 
-    special_tokens: Dict[str, int]
+    special_tokens: dict[str, int]
 
     num_reserved_special_tokens = 256
 
@@ -116,9 +109,9 @@ class Tokenizer:
         *,
         bos: bool,
         eos: bool,
-        allowed_special: Optional[Union[Literal["all"], AbstractSet[str]]] = None,
-        disallowed_special: Union[Literal["all"], Collection[str]] = (),
-    ) -> List[int]:
+        allowed_special: Literal["all"] | Set[str] | None = None,
+        disallowed_special: Literal["all"] | Collection[str] = (),
+    ) -> list[int]:
         """
         Encodes a string into a list of token IDs.
 
@@ -151,7 +144,7 @@ class Tokenizer:
                 s[i : i + TIKTOKEN_MAX_ENCODE_CHARS], MAX_NO_WHITESPACES_CHARS
             )
         )
-        t: List[int] = []
+        t: list[int] = []
         for substr in substrs:
             t.extend(
                 self.model.encode(
@@ -177,7 +170,7 @@ class Tokenizer:
             str: The decoded string.
         """
         # Typecast is safe here. Tiktoken doesn't do anything list-related with the sequence.
-        return self.model.decode(cast(List[int], t))
+        return self.model.decode(cast(list[int], t))
 
     @staticmethod
     def _split_whitespaces_or_nonwhitespaces(s: str, max_consecutive_slice_len: int) -> Iterator[str]:
diff --git a/llama_stack/models/llama/llama3/tool_utils.py b/llama_stack/models/llama/llama3/tool_utils.py
index ef39ba0a5..574080184 100644
--- a/llama_stack/models/llama/llama3/tool_utils.py
+++ b/llama_stack/models/llama/llama3/tool_utils.py
@@ -6,7 +6,6 @@
 
 import json
 import re
-from typing import Optional, Tuple
 
 from llama_stack.log import get_logger
 
@@ -172,7 +171,7 @@ class ToolUtils:
         return match is not None
 
     @staticmethod
-    def maybe_extract_builtin_tool_call(message_body: str) -> Optional[Tuple[str, str]]:
+    def maybe_extract_builtin_tool_call(message_body: str) -> tuple[str, str] | None:
         # Find the first match in the text
         match = re.search(BUILTIN_TOOL_PATTERN, message_body)
 
@@ -185,7 +184,7 @@ class ToolUtils:
             return None
 
     @staticmethod
-    def maybe_extract_custom_tool_call(message_body: str) -> Optional[Tuple[str, str]]:
+    def maybe_extract_custom_tool_call(message_body: str) -> tuple[str, str] | None:
         # NOTE: Custom function too calls are still experimental
         # Sometimes, response is of the form
         # {"type": "function", "name": "function_name", "parameters": {...}
@@ -204,7 +203,9 @@ class ToolUtils:
                 return None
         elif is_json(message_body):
             response = json.loads(message_body)
-            if ("type" in response and response["type"] == "function") or ("name" in response):
+            if ("type" in response and response["type"] == "function") or (
+                "name" in response and "parameters" in response
+            ):
                 function_name = response["name"]
                 args = response["parameters"]
                 return function_name, args
@@ -250,7 +251,7 @@ class ToolUtils:
                 def format_value(value: RecursiveType) -> str:
                     if isinstance(value, str):
                         return f'"{value}"'
-                    elif isinstance(value, (int, float, bool)) or value is None:
+                    elif isinstance(value, int | float | bool) or value is None:
                         return str(value)
                     elif isinstance(value, list):
                         return f"[{', '.join(format_value(v) for v in value)}]"
diff --git a/llama_stack/models/llama/llama3_1/prompts.py b/llama_stack/models/llama/llama3_1/prompts.py
index 9dcc51dc8..579a5ee02 100644
--- a/llama_stack/models/llama/llama3_1/prompts.py
+++ b/llama_stack/models/llama/llama3_1/prompts.py
@@ -12,7 +12,6 @@
 # the top-level of this source tree.
 
 import textwrap
-from typing import List
 
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
@@ -73,7 +72,7 @@ def wolfram_alpha_response():
     )
 
 
-def usecases() -> List[UseCase | str]:
+def usecases() -> list[UseCase | str]:
     return [
         textwrap.dedent(
             """
diff --git a/llama_stack/models/llama/llama3_3/prompts.py b/llama_stack/models/llama/llama3_3/prompts.py
index 194e4fa26..60349e578 100644
--- a/llama_stack/models/llama/llama3_3/prompts.py
+++ b/llama_stack/models/llama/llama3_3/prompts.py
@@ -12,7 +12,6 @@
 # the top-level of this source tree.
 
 import textwrap
-from typing import List
 
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
@@ -74,7 +73,7 @@ def wolfram_alpha_response():
     )
 
 
-def usecases() -> List[UseCase | str]:
+def usecases() -> list[UseCase | str]:
     return [
         textwrap.dedent(
             """
diff --git a/llama_stack/models/llama/llama4/args.py b/llama_stack/models/llama/llama4/args.py
index dd5f7cbde..523d6ed10 100644
--- a/llama_stack/models/llama/llama4/args.py
+++ b/llama_stack/models/llama/llama4/args.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Optional
 
 from pydantic import BaseModel, model_validator
 
@@ -15,8 +14,8 @@ class QuantizationScheme(Enum):
 
 
 class QuantizationArgs(BaseModel):
-    scheme: Optional[QuantizationScheme] = None
-    group_size: Optional[int] = None
+    scheme: QuantizationScheme | None = None
+    group_size: int | None = None
     spinquant: bool = False
 
 
@@ -58,32 +57,32 @@ class ModelArgs(BaseModel):
     dim: int = -1
     n_layers: int = -1
     n_heads: int = -1
-    n_kv_heads: Optional[int] = None
-    head_dim: Optional[int] = None
+    n_kv_heads: int | None = None
+    head_dim: int | None = None
 
     vocab_size: int = -1
     multiple_of: int = 256  # make SwiGLU hidden layer size multiple of large power of 2
-    ffn_dim_multiplier: Optional[float] = None
-    ffn_exp: Optional[float] = None
+    ffn_dim_multiplier: float | None = None
+    ffn_exp: float | None = None
     norm_eps: float = 1e-5
 
-    attention_chunk_size: Optional[int] = None
+    attention_chunk_size: int | None = None
     rope_theta: float = 500000
     use_scaled_rope: bool = False
-    rope_scaling_factor: Optional[float] = None
-    rope_high_freq_factor: Optional[float] = None
+    rope_scaling_factor: float | None = None
+    rope_high_freq_factor: float | None = None
 
-    nope_layer_interval: Optional[int] = None  # No position encoding in every n layers
+    nope_layer_interval: int | None = None  # No position encoding in every n layers
     use_qk_norm: bool = False
     # Set to True to enable inference-time temperature tuning (useful for very long context)
     attn_temperature_tuning: bool = False
     floor_scale: float = 8192.0
     attn_scale: float = 0.1
 
-    vision_args: Optional[VisionArgs] = None
-    moe_args: Optional[MoEArgs] = None
-    quantization_args: Optional[QuantizationArgs] = None
-    lora_args: Optional[LoRAArgs] = None
+    vision_args: VisionArgs | None = None
+    moe_args: MoEArgs | None = None
+    quantization_args: QuantizationArgs | None = None
+    lora_args: LoRAArgs | None = None
 
     max_batch_size: int = 32
     max_seq_len: int = 2048
diff --git a/llama_stack/models/llama/llama4/chat_format.py b/llama_stack/models/llama/llama4/chat_format.py
index 9d60d00e9..96ebd0881 100644
--- a/llama_stack/models/llama/llama4/chat_format.py
+++ b/llama_stack/models/llama/llama4/chat_format.py
@@ -5,9 +5,9 @@
 # the root directory of this source tree.
 
 import io
+import json
 import uuid
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple
 
 import torch
 from PIL import Image as PIL_Image
@@ -45,10 +45,10 @@ def role_str(role: Role) -> str:
 class TransformedImage:
     image_tiles: torch.Tensor
     # is the aspect ratio needed anywhere?
-    aspect_ratio: Tuple[int, int]
+    aspect_ratio: tuple[int, int]
 
 
-def convert_image_to_rgb(image: PIL_Image.Image, bg: Tuple[int, int, int] = (255, 255, 255)) -> PIL_Image.Image:
+def convert_image_to_rgb(image: PIL_Image.Image, bg: tuple[int, int, int] = (255, 255, 255)) -> PIL_Image.Image:
     if image.mode == "RGBA":
         image.load()  # for png.split()
         new_img = PIL_Image.new("RGB", image.size, bg)
@@ -58,12 +58,12 @@ def convert_image_to_rgb(image: PIL_Image.Image, bg: Tuple[int, int, int] = (255
 
 
 class ChatFormat:
-    possible_headers: Dict[Role, str]
+    possible_headers: dict[Role, str]
 
     def __init__(
         self,
         tokenizer: Tokenizer,
-        vision_args: Optional[VisionArgs] = None,
+        vision_args: VisionArgs | None = None,
         max_num_chunks: int = 16,
     ):
         self.tokenizer = tokenizer
@@ -80,7 +80,7 @@ class ChatFormat:
                 vision_args.image_size.width, vision_args.image_size.height
             )
 
-    def _encode_header(self, role: str) -> List[int]:
+    def _encode_header(self, role: str) -> list[int]:
         tokens = []
         tokens.append(self.tokenizer.special_tokens["<|header_start|>"])
 
@@ -97,7 +97,7 @@ class ChatFormat:
     def _encode_image(
         self,
         transformed_image: TransformedImage,
-    ) -> List[int]:
+    ) -> list[int]:
         assert self.vision_args is not None, "The model is not vision-enabled"
 
         image_tensor = transformed_image.image_tiles
@@ -139,7 +139,7 @@ class ChatFormat:
 
         return tokens
 
-    def _encode_content(self, content: RawContent, bos: bool = False) -> Tuple[List[int], List[TransformedImage]]:
+    def _encode_content(self, content: RawContent, bos: bool = False) -> tuple[list[int], list[TransformedImage]]:
         tokens = []
         tranformed_images = []
 
@@ -188,7 +188,7 @@ class ChatFormat:
 
     def encode_message(
         self, message: RawMessage, tool_prompt_format: ToolPromptFormat
-    ) -> Tuple[List[int], List[TransformedImage]]:
+    ) -> tuple[list[int], list[TransformedImage]]:
         tokens = self._encode_header(message.role)
         images = []
 
@@ -222,7 +222,7 @@ class ChatFormat:
 
     def encode_dialog_prompt(
         self,
-        messages: List[RawMessage],
+        messages: list[RawMessage],
         tool_prompt_format: ToolPromptFormat = ToolPromptFormat.json,
     ) -> LLMInput:
         tokens = []
@@ -239,7 +239,7 @@ class ChatFormat:
         return self._model_input_from_tokens_images(tokens, images)
 
     # TODO(this should be generic, not only for assistant messages)
-    def decode_assistant_message(self, tokens: List[int], stop_reason: StopReason) -> RawMessage:
+    def decode_assistant_message(self, tokens: list[int], stop_reason: StopReason) -> RawMessage:
         content = self.tokenizer.decode(tokens)
 
         return self.decode_assistant_message_from_content(content, stop_reason)
@@ -299,8 +299,10 @@ class ChatFormat:
                     call_id=call_id,
                     tool_name=tool_name,
                     arguments=tool_arguments,
+                    arguments_json=json.dumps(tool_arguments),
                 )
             )
+            content = ""
 
         return RawMessage(
             role="assistant",
@@ -309,7 +311,7 @@ class ChatFormat:
             tool_calls=tool_calls,
         )
 
-    def _model_input_from_tokens_images(self, tokens: List[int], images: List[TransformedImage]) -> LLMInput:
+    def _model_input_from_tokens_images(self, tokens: list[int], images: list[TransformedImage]) -> LLMInput:
         return LLMInput(
             tokens=tokens,
             images=[x.image_tiles for x in images] if len(images) > 0 else None,
diff --git a/llama_stack/models/llama/llama4/datatypes.py b/llama_stack/models/llama/llama4/datatypes.py
index 27174db63..24d8ae948 100644
--- a/llama_stack/models/llama/llama4/datatypes.py
+++ b/llama_stack/models/llama/llama4/datatypes.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 from dataclasses import dataclass
-from typing import List, Optional, Union
 
 import torch
 
@@ -30,7 +29,7 @@ class LLMInput:
     tokens: torch.Tensor
 
     # images are already pre-processed (resized, tiled, etc.)
-    images: Optional[List[torch.Tensor]] = None
+    images: list[torch.Tensor] | None = None
 
 
 @dataclass
@@ -45,8 +44,8 @@ class TransformerInput:
     # tokens_position defines the position of the tokens in each batch,
     # - when it is a tensor ([batch_size,]), it is the start position of the tokens in each batch
     # - when it is an int, the start position are the same for all batches
-    tokens_position: Union[torch.Tensor, int]
-    image_embedding: Optional[MaskedEmbedding] = None
+    tokens_position: torch.Tensor | int
+    image_embedding: MaskedEmbedding | None = None
 
 
 @dataclass
diff --git a/llama_stack/models/llama/llama4/ffn.py b/llama_stack/models/llama/llama4/ffn.py
index 9c9fca5fc..6584f1a2a 100644
--- a/llama_stack/models/llama/llama4/ffn.py
+++ b/llama_stack/models/llama/llama4/ffn.py
@@ -11,7 +11,7 @@
 # top-level folder for each specific model found within the models/ directory at
 # the top-level of this source tree.
 
-from typing import Any, Dict, List
+from typing import Any
 
 from fairscale.nn.model_parallel.layers import ColumnParallelLinear, RowParallelLinear
 from fairscale.nn.model_parallel.mappings import reduce_from_model_parallel_region
@@ -36,13 +36,13 @@ class FeedForward(nn.Module):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ) -> None:
         if prefix + "mlp.fc1_weight" in state_dict:
             w1, w3 = state_dict.pop(prefix + "mlp.fc1_weight").chunk(2, dim=0)
diff --git a/llama_stack/models/llama/llama4/generation.py b/llama_stack/models/llama/llama4/generation.py
index 8e94bb33a..6132d25d4 100644
--- a/llama_stack/models/llama/llama4/generation.py
+++ b/llama_stack/models/llama/llama4/generation.py
@@ -10,8 +10,8 @@ import json
 import os
 import sys
 import time
+from collections.abc import Callable, Generator
 from pathlib import Path
-from typing import Callable, Generator, List, Optional
 
 import torch
 import torch.nn.functional as F
@@ -38,8 +38,8 @@ class Llama4:
         ckpt_dir: str,
         max_seq_len: int,
         max_batch_size: int,
-        world_size: Optional[int] = None,
-        quantization_mode: Optional[QuantizationMode] = None,
+        world_size: int | None = None,
+        quantization_mode: QuantizationMode | None = None,
         seed: int = 1,
     ):
         if not torch.distributed.is_initialized():
@@ -63,7 +63,7 @@ class Llama4:
         ckpt_paths = sorted(Path(ckpt_dir).glob("*.pth"))
         assert len(ckpt_paths) > 0, f"no checkpoint files found in {ckpt_dir}"
         print(f"Loading a checkpoint (shards={len(ckpt_paths)}, current-mp-size={world_size})")
-        with open(Path(ckpt_dir) / "params.json", "r") as f:
+        with open(Path(ckpt_dir) / "params.json") as f:
             params = json.loads(f.read())
 
         model_args: ModelArgs = ModelArgs(
@@ -117,15 +117,15 @@ class Llama4:
     @torch.inference_mode()
     def generate(
         self,
-        llm_inputs: List[LLMInput],
+        llm_inputs: list[LLMInput],
         temperature: float = 0.6,
         top_p: float = 0.9,
-        max_gen_len: Optional[int] = None,
+        max_gen_len: int | None = None,
         logprobs: bool = False,
         echo: bool = False,
         print_model_input: bool = False,
-        logits_processor: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
-    ) -> Generator[List[GenerationResult], None, None]:
+        logits_processor: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
+    ) -> Generator[list[GenerationResult], None, None]:
         if max_gen_len is None or max_gen_len == 0 or max_gen_len >= self.model.args.max_seq_len:
             max_gen_len = self.model.args.max_seq_len - 1
 
@@ -133,9 +133,9 @@ class Llama4:
 
         print_model_input = print_model_input or os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1"
         if print_model_input:
-            cprint("Input to model:\n", "yellow")
+            cprint("Input to model:\n", color="yellow", file=sys.stderr)
             for inp in llm_inputs:
-                cprint(self.tokenizer.decode(inp.tokens), "grey")
+                cprint(self.tokenizer.decode(inp.tokens), color="grey", file=sys.stderr)
         prompt_tokens = [inp.tokens for inp in llm_inputs]
 
         bsz = len(llm_inputs)
@@ -145,7 +145,7 @@ class Llama4:
         max_prompt_len = max(len(t) for t in prompt_tokens)
 
         if max_prompt_len >= params.max_seq_len:
-            cprint(f"Out of token budget {max_prompt_len} vs {params.max_seq_len}", "red")
+            cprint(f"Out of token budget {max_prompt_len} vs {params.max_seq_len}", color="red", file=sys.stderr)
             return
 
         total_len = min(max_gen_len + max_prompt_len, params.max_seq_len)
@@ -245,13 +245,13 @@ class Llama4:
 
     def completion(
         self,
-        contents: List[RawContent],
+        contents: list[RawContent],
         temperature: float = 0.6,
         top_p: float = 0.9,
-        max_gen_len: Optional[int] = None,
+        max_gen_len: int | None = None,
         logprobs: bool = False,
         echo: bool = False,
-    ) -> Generator[List[GenerationResult], None, None]:
+    ) -> Generator[list[GenerationResult], None, None]:
         llm_inputs = [self.formatter.encode_content(c) for c in contents]
         for result in self.generate(
             llm_inputs=llm_inputs,
@@ -267,13 +267,13 @@ class Llama4:
 
     def chat_completion(
         self,
-        messages_batch: List[List[RawMessage]],
+        messages_batch: list[list[RawMessage]],
         temperature: float = 0.6,
         top_p: float = 0.9,
-        max_gen_len: Optional[int] = None,
+        max_gen_len: int | None = None,
         logprobs: bool = False,
         echo: bool = False,
-    ) -> Generator[List[GenerationResult], None, None]:
+    ) -> Generator[list[GenerationResult], None, None]:
         llm_inputs = [self.formatter.encode_dialog_prompt(messages) for messages in messages_batch]
         for result in self.generate(
             llm_inputs=llm_inputs,
diff --git a/llama_stack/models/llama/llama4/model.py b/llama_stack/models/llama/llama4/model.py
index 2272b868d..4fb1181f7 100644
--- a/llama_stack/models/llama/llama4/model.py
+++ b/llama_stack/models/llama/llama4/model.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import math
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 
 import fairscale.nn.model_parallel.initialize as fs_init
 import torch
@@ -89,7 +89,7 @@ def apply_rotary_emb(
     xq: torch.Tensor,
     xk: torch.Tensor,
     freqs_cis: torch.Tensor,
-) -> Tuple[torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor]:
     xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
     xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
     freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
@@ -174,13 +174,13 @@ class Attention(nn.Module):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ) -> None:
         if prefix + "wqkv.weight" in state_dict:
             wqkv = state_dict.pop(prefix + "wqkv.weight")
@@ -200,7 +200,7 @@ class Attention(nn.Module):
         x: torch.Tensor,
         start_pos: int,
         freqs_cis: torch.Tensor,
-        mask: Optional[torch.Tensor] = None,
+        mask: torch.Tensor | None = None,
     ):
         bsz, seqlen, _ = x.shape
         xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
@@ -288,13 +288,13 @@ class TransformerBlock(nn.Module):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ) -> None:
         if prefix + "attention.wqkv.layer_norm_weight" in state_dict:
             state_dict[prefix + "attention_norm.weight"] = state_dict.pop(prefix + "attention.wqkv.layer_norm_weight")
@@ -318,8 +318,8 @@ class TransformerBlock(nn.Module):
         x: torch.Tensor,
         start_pos: int,
         freqs_cis: torch.Tensor,
-        global_attn_mask: Optional[torch.Tensor],
-        local_attn_mask: Optional[torch.Tensor],
+        global_attn_mask: torch.Tensor | None,
+        local_attn_mask: torch.Tensor | None,
     ):
         # The iRoPE architecture uses global attention mask for NoPE layers or
         # if chunked local attention is not used
@@ -374,13 +374,13 @@ class Transformer(nn.Module):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ) -> None:
         if prefix + "rope.freqs" in state_dict:
             state_dict.pop(prefix + "rope.freqs")
diff --git a/llama_stack/models/llama/llama4/moe.py b/llama_stack/models/llama/llama4/moe.py
index 2ce49e915..7475963d3 100644
--- a/llama_stack/models/llama/llama4/moe.py
+++ b/llama_stack/models/llama/llama4/moe.py
@@ -6,7 +6,7 @@
 
 # ruff: noqa: N806
 # pyre-strict
-from typing import Any, Dict, List
+from typing import Any
 
 import fairscale.nn.model_parallel.initialize as fs_init
 import torch
@@ -63,13 +63,13 @@ class Experts(nn.Module):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ) -> None:
         self.prefix = prefix
         if prefix + "moe_w_in_eD_F" in state_dict:
@@ -158,13 +158,13 @@ class MoE(torch.nn.Module):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ) -> None:
         if prefix + "w_in_shared_FD.weight" in state_dict:
             state_dict[prefix + "shared_expert.w1.weight"] = state_dict.pop(prefix + "w_in_shared_FD.weight")
@@ -210,5 +210,5 @@ class MoE(torch.nn.Module):
 
 
 def divide_exact(numerator: int, denominator: int) -> int:
-    assert numerator % denominator == 0, "{} is not divisible by {}".format(numerator, denominator)
+    assert numerator % denominator == 0, f"{numerator} is not divisible by {denominator}"
     return numerator // denominator
diff --git a/llama_stack/models/llama/llama4/preprocess.py b/llama_stack/models/llama/llama4/preprocess.py
index 689680779..7527a9987 100644
--- a/llama_stack/models/llama/llama4/preprocess.py
+++ b/llama_stack/models/llama/llama4/preprocess.py
@@ -13,7 +13,6 @@
 
 import math
 from collections import defaultdict
-from typing import Optional, Set, Tuple
 
 import torch
 import torchvision.transforms as tv
@@ -52,7 +51,7 @@ class ResizeNormalizeImageTransform:
         return self.tv_transform(image)
 
 
-class VariableSizeImageTransform(object):
+class VariableSizeImageTransform:
     """
     This class accepts images of any size and dynamically resize, pads and chunks it
     based on the image aspect ratio and the number of image chunks we allow.
@@ -100,7 +99,7 @@ class VariableSizeImageTransform(object):
         self.resample = tv.InterpolationMode.BILINEAR
 
     @staticmethod
-    def get_factors(n: int) -> Set[int]:
+    def get_factors(n: int) -> set[int]:
         """
         Calculate all factors of a given number, i.e. a dividor that leaves
         no remainder. For example, if n=12, it will return {1, 2, 3, 4, 6, 12}.
@@ -170,9 +169,9 @@ class VariableSizeImageTransform(object):
 
     @staticmethod
     def get_max_res_without_distortion(
-        image_size: Tuple[int, int],
-        target_size: Tuple[int, int],
-    ) -> Tuple[int, int]:
+        image_size: tuple[int, int],
+        target_size: tuple[int, int],
+    ) -> tuple[int, int]:
         """
         Determines the maximum resolution to which an image can be resized to without distorting its
         aspect ratio, based on the target resolution.
@@ -223,8 +222,8 @@ class VariableSizeImageTransform(object):
     def resize_without_distortion(
         self,
         image: torch.Tensor,
-        target_size: Tuple[int, int],
-        max_upscaling_size: Optional[int],
+        target_size: tuple[int, int],
+        max_upscaling_size: int | None,
     ) -> torch.Tensor:
         """
         Used to resize an image to target_resolution, without distortion.
@@ -289,10 +288,10 @@ class VariableSizeImageTransform(object):
 
     def get_best_fit(
         self,
-        image_size: Tuple[int, int],
+        image_size: tuple[int, int],
         possible_resolutions: torch.Tensor,
         resize_to_max_canvas: bool = False,
-    ) -> Tuple[int, int]:
+    ) -> tuple[int, int]:
         """
         Determines the best canvas possible from a list of possible resolutions to, without distortion,
         resize an image to.
@@ -392,7 +391,7 @@ class VariableSizeImageTransform(object):
         max_num_chunks: int,
         normalize_img: bool = True,
         resize_to_max_canvas: bool = False,
-    ) -> Tuple[torch.Tensor, Tuple[int, int]]:
+    ) -> tuple[torch.Tensor, tuple[int, int]]:
         """
         Args:
             image (PIL.Image): Image to be resized.
diff --git a/llama_stack/models/llama/llama4/prompt_format.md b/llama_stack/models/llama/llama4/prompt_format.md
index 698571093..7ae998310 100644
--- a/llama_stack/models/llama/llama4/prompt_format.md
+++ b/llama_stack/models/llama/llama4/prompt_format.md
@@ -64,7 +64,7 @@ This example passes an image that is smaller than the tile size, to show the til
 
 ##### Model Response Format
 ```
-The image depicts a dog standing on a skateboard, with its front paws positioned on the board and its back paws hanging off the back. The dog has a distinctive coat pattern, featuring a white face, brown and black fur, and white paws, and is standing on a skateboard with red wheels, set against a blurred background of a street or alleyway with a teal door and beige wall.<|eot|>
+The image depicts a dog standing on a skateboard, positioned centrally and facing the camera directly. The dog has a distinctive coat pattern featuring white, black, and brown fur, with floppy ears and a black nose, and is standing on a skateboard with red wheels.<|eot|>
 ```
 
 
@@ -91,7 +91,7 @@ Here is an example of how to pass an image to the model
 
 ##### Model Response Format
 ```
-This image shows a dog standing on a skateboard, with its front paws positioned near the front of the board and its back paws near the back. The dog has a white, black, and orange coat, and is standing on a gray skateboard with red wheels, in front of a blurred background that appears to be a street or alleyway.<|eot|>
+The image depicts a dog standing on a skateboard, with the dog positioned centrally and facing forward. The dog has a distinctive coat featuring a mix of white, brown, and black fur, and is wearing a collar as it stands on the skateboard, which has red wheels.<|eot|>
 ```
 
 
@@ -117,7 +117,7 @@ Here is an example of how to pass an image to the model
 
 ##### Model Response Format
 ```
-The first image shows a dog standing on a skateboard, while the second image shows a plate of spaghetti with tomato sauce, parmesan cheese, and parsley. The two images are unrelated, with the first image featuring a dog and the second image featuring a food dish, and they do not share any common elements or themes.<|eot|>
+The first image features a dog standing on a skateboard, while the second image showcases a plate of spaghetti with tomato sauce and cheese. The two images appear to be unrelated, with one depicting a playful scene of a dog on a skateboard and the other presenting a classic Italian dish.<|eom|>
 ```
 
 
@@ -135,25 +135,52 @@ We are continuing the format for zero shot function calling used in previous ver
 ```
 <|begin_of_text|><|header_start|>system<|header_end|>
 
-You are an expert in composing functions. You are given a question and a set of possible functions.
-Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
-If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
-also point it out. You should only return the function call in tools call sections.
+You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines:
 
-If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
-You SHOULD NOT include any other text in the response.
+1. FUNCTION CALLS:
+- ONLY use functions that are EXPLICITLY listed in the function list below
+- If NO functions are listed (empty function list []), respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information"
+- If a function is not in the list, respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information"
+- If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s)
+- Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]
+Examples:
+CORRECT: [get_weather(location="Vancouver"), calculate_route(start="Boston", end="New York")] <- Only if get_weather and calculate_route are in function list
+INCORRECT: get_weather(location="New York")
+INCORRECT: Let me check the weather: [get_weather(location="New York")]
+INCORRECT: [get_events(location="Singapore")] <- If function not in list
 
-Here is a list of functions in JSON format that you can invoke.
+2. RESPONSE RULES:
+- For pure function requests matching a listed function: ONLY output the function call(s)
+- For knowledge questions: ONLY output text
+- For missing parameters: ONLY request the specific missing parameters
+- For unavailable services (not in function list): output ONLY with internal knowledge or "I don't have access to [Unavailable service] information". Do NOT execute a function call.
+- If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations
+- NEVER combine text and function calls in the same response
+- NEVER suggest alternative functions when the requested service is unavailable
+- NEVER create or invent new functions not listed below
 
+3. STRICT BOUNDARIES:
+- ONLY use functions from the list below - no exceptions
+- NEVER use a function as an alternative to unavailable information
+- NEVER call functions not present in the function list
+- NEVER add explanatory text to function calls
+- NEVER respond with empty brackets
+- Use proper Python/JSON syntax for function calls
+- Check the function list carefully before responding
+
+4. TOOL RESPONSE HANDLING:
+- When receiving tool responses: provide concise, natural language responses
+- Don't repeat tool response verbatim
+- Don't add supplementary information
+
+Here is a list of functions in JSON format that you can invoke:
 [
     {
         "name": "get_weather",
         "description": "Get weather info for places",
         "parameters": {
             "type": "dict",
-            "required": [
-                "city"
-            ],
+            "required": ["city"],
             "properties": {
                 "city": {
                     "type": "string",
@@ -167,7 +194,7 @@ Here is a list of functions in JSON format that you can invoke.
             }
         }
     }
-<|eot|><|header_start|>user<|header_end|>
+]<|eot|><|header_start|>user<|header_end|>
 
 What is the weather in SF and Seattle?<|eot|><|header_start|>assistant<|header_end|>
 
@@ -176,7 +203,7 @@ What is the weather in SF and Seattle?<|eot|><|header_start|>assistant<|header_e
 
 ##### Model Response Format
 ```
-[get_weather(city='SF'), get_weather(city='Seattle')]<|eot|>
+[get_weather(city="San Francisco"), get_weather(city="Seattle")]<|eot|>
 ```
 
 
@@ -273,5 +300,5 @@ Use tools to get latest trending songs<|eot|><|header_start|>assistant<|header_e
 
 ##### Model Response Format
 ```
-<function=trending_songs>{"n": "10"}</function><|eot|>
+<function=trending_songs>{"n": 10}</function><|eot|>
 ```
diff --git a/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py b/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
new file mode 100644
index 000000000..9c19f89ae
--- /dev/null
+++ b/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
@@ -0,0 +1,137 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# top-level folder for each specific model found within the models/ directory at
+# the top-level of this source tree.
+
+import textwrap
+
+from llama_stack.apis.inference import ToolDefinition, ToolParamDefinition
+from llama_stack.models.llama.llama3.prompt_templates.base import (
+    PromptTemplate,
+    PromptTemplateGeneratorBase,
+)
+
+
+class PythonListCustomToolGenerator(PromptTemplateGeneratorBase):  # noqa: N801
+    DEFAULT_PROMPT = textwrap.dedent(
+        """
+        You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines:
+
+        1. FUNCTION CALLS:
+        - ONLY use functions that are EXPLICITLY listed in the function list below
+        - If NO functions are listed (empty function list []), respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information"
+        - If a function is not in the list, respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information"
+        - If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s)
+        - Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]
+        Examples:
+        CORRECT: [get_weather(location="Vancouver"), calculate_route(start="Boston", end="New York")] <- Only if get_weather and calculate_route are in function list
+        INCORRECT: get_weather(location="New York")
+        INCORRECT: Let me check the weather: [get_weather(location="New York")]
+        INCORRECT: [get_events(location="Singapore")] <- If function not in list
+
+        2. RESPONSE RULES:
+        - For pure function requests matching a listed function: ONLY output the function call(s)
+        - For knowledge questions: ONLY output text
+        - For missing parameters: ONLY request the specific missing parameters
+        - For unavailable services (not in function list): output ONLY with internal knowledge or "I don't have access to [Unavailable service] information". Do NOT execute a function call.
+        - If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations
+        - NEVER combine text and function calls in the same response
+        - NEVER suggest alternative functions when the requested service is unavailable
+        - NEVER create or invent new functions not listed below
+
+        3. STRICT BOUNDARIES:
+        - ONLY use functions from the list below - no exceptions
+        - NEVER use a function as an alternative to unavailable information
+        - NEVER call functions not present in the function list
+        - NEVER add explanatory text to function calls
+        - NEVER respond with empty brackets
+        - Use proper Python/JSON syntax for function calls
+        - Check the function list carefully before responding
+
+        4. TOOL RESPONSE HANDLING:
+        - When receiving tool responses: provide concise, natural language responses
+        - Don't repeat tool response verbatim
+        - Don't add supplementary information
+
+        {{ function_description }}
+        """.strip("\n")
+    )
+
+    def gen(self, custom_tools: list[ToolDefinition], system_prompt: str | None = None) -> PromptTemplate:
+        system_prompt = system_prompt or self.DEFAULT_PROMPT
+        return PromptTemplate(
+            system_prompt,
+            {"function_description": self._gen_function_description(custom_tools)},
+        )
+
+    def _gen_function_description(self, custom_tools: list[ToolDefinition]) -> PromptTemplate:
+        template_str = textwrap.dedent(
+            """
+            Here is a list of functions in JSON format that you can invoke:
+            [
+                {% for t in tools -%}
+                {# manually setting up JSON because jinja sorts keys in unexpected ways -#}
+                {%- set tname = t.tool_name -%}
+                {%- set tdesc = t.description -%}
+                {%- set tparams = t.parameters -%}
+                {%- set required_params = [] -%}
+                {%- for name, param in tparams.items() if param.required == true -%}
+                    {%- set _ = required_params.append(name) -%}
+                {%- endfor -%}
+                {
+                    "name": "{{tname}}",
+                    "description": "{{tdesc}}",
+                    "parameters": {
+                        "type": "dict",
+                        "required": {{ required_params | tojson }},
+                        "properties": {
+                            {%- for name, param in tparams.items() %}
+                            "{{name}}": {
+                                "type": "{{param.param_type}}",
+                                "description": "{{param.description}}"{% if param.default %},
+                                "default": "{{param.default}}"{% endif %}
+                            }{% if not loop.last %},{% endif %}
+                            {%- endfor %}
+                        }
+                    }
+                }{% if not loop.last %},
+                {% endif -%}
+                {%- endfor %}
+            ]
+            """
+        )
+        return PromptTemplate(
+            template_str.strip("\n"),
+            {"tools": [t.model_dump() for t in custom_tools]},
+        ).render()
+
+    def data_examples(self) -> list[list[ToolDefinition]]:
+        return [
+            [
+                ToolDefinition(
+                    tool_name="get_weather",
+                    description="Get weather info for places",
+                    parameters={
+                        "city": ToolParamDefinition(
+                            param_type="string",
+                            description="The name of the city to get the weather for",
+                            required=True,
+                        ),
+                        "metric": ToolParamDefinition(
+                            param_type="string",
+                            description="The metric for weather. Options are: celsius, fahrenheit",
+                            required=False,
+                            default="celsius",
+                        ),
+                    },
+                ),
+            ]
+        ]
diff --git a/llama_stack/models/llama/llama4/prompts.py b/llama_stack/models/llama/llama4/prompts.py
index 13b96359a..2da94db7b 100644
--- a/llama_stack/models/llama/llama4/prompts.py
+++ b/llama_stack/models/llama/llama4/prompts.py
@@ -7,7 +7,10 @@
 import textwrap
 from io import BytesIO
 from pathlib import Path
-from typing import List
+
+from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
+    PythonListCustomToolGenerator,
+)
 
 from ..datatypes import RawMediaItem, RawMessage, RawTextItem
 from ..prompt_format import (
@@ -19,7 +22,7 @@ from ..prompt_format import (
 THIS_DIR = Path(__file__).parent
 
 
-def usecases(base_model: bool = False) -> List[UseCase | str]:
+def usecases(base_model: bool = False) -> list[UseCase | str]:
     with open(THIS_DIR.parent / "resources/small_dog.jpg", "rb") as f:
         img_small_dog = f.read()
     with open(THIS_DIR.parent / "resources/dog.jpg", "rb") as f:
@@ -177,39 +180,9 @@ def usecases(base_model: bool = False) -> List[UseCase | str]:
                     [
                         RawMessage(
                             role="system",
-                            content="""You are an expert in composing functions. You are given a question and a set of possible functions.
-Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
-If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
-also point it out. You should only return the function call in tools call sections.
-
-If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
-You SHOULD NOT include any other text in the response.
-
-Here is a list of functions in JSON format that you can invoke.
-
-[
-    {
-        "name": "get_weather",
-        "description": "Get weather info for places",
-        "parameters": {
-            "type": "dict",
-            "required": [
-                "city"
-            ],
-            "properties": {
-                "city": {
-                    "type": "string",
-                    "description": "The name of the city to get the weather for"
-                },
-                "metric": {
-                    "type": "string",
-                    "description": "The metric for weather. Options are: celsius, fahrenheit",
-                    "default": "celsius"
-                }
-            }
-        }
-    }
-""",
+                            content=PythonListCustomToolGenerator()
+                            .gen(PythonListCustomToolGenerator().data_examples()[0])
+                            .render(),
                         ),
                         RawMessage(
                             role="user",
diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/llama_stack/models/llama/llama4/quantization/loader.py
index f11d83c60..223744a5f 100644
--- a/llama_stack/models/llama/llama4/quantization/loader.py
+++ b/llama_stack/models/llama/llama4/quantization/loader.py
@@ -6,7 +6,7 @@
 
 import logging
 import os
-from typing import Callable, Optional
+from collections.abc import Callable
 
 import torch
 from fairscale.nn.model_parallel.initialize import get_model_parallel_rank
@@ -45,8 +45,8 @@ def experts_batched_swiglu_wrapper(
 def convert_to_quantized_model(
     model: Transformer,
     checkpoint_dir: str,
-    quantization_mode: Optional[str] = None,
-    fp8_activation_scale_ub: Optional[float] = 1200.0,
+    quantization_mode: str | None = None,
+    fp8_activation_scale_ub: float | None = 1200.0,
     use_rich_progress: bool = True,
 ) -> Transformer:
     from ...quantize_impls import (
@@ -213,7 +213,7 @@ def logging_callbacks(
         )
         task_id = progress.add_task("[blue]Converting layers...", total=total_blocks, status="Starting")
 
-    def update_status(message: Optional[str], completed: Optional[int] = None) -> None:
+    def update_status(message: str | None, completed: int | None = None) -> None:
         if use_rich_progress:
             if message is not None:
                 progress.update(task_id, status=message)
diff --git a/llama_stack/models/llama/llama4/tokenizer.model b/llama_stack/models/llama/llama4/tokenizer.model
old mode 100755
new mode 100644
diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/llama_stack/models/llama/llama4/tokenizer.py
index 0d2cc7ce5..74070d43e 100644
--- a/llama_stack/models/llama/llama4/tokenizer.py
+++ b/llama_stack/models/llama/llama4/tokenizer.py
@@ -5,18 +5,11 @@
 # the root directory of this source tree.
 
 import os
+from collections.abc import Collection, Iterator, Sequence, Set
 from logging import getLogger
 from pathlib import Path
 from typing import (
-    AbstractSet,
-    Collection,
-    Dict,
-    Iterator,
-    List,
     Literal,
-    Optional,
-    Sequence,
-    Union,
     cast,
 )
 
@@ -114,7 +107,7 @@ class Tokenizer:
     Tokenizing and encoding/decoding text using the Tiktoken tokenizer.
     """
 
-    special_tokens: Dict[str, int]
+    special_tokens: dict[str, int]
 
     num_reserved_special_tokens = 2048
 
@@ -182,9 +175,9 @@ class Tokenizer:
         *,
         bos: bool,
         eos: bool,
-        allowed_special: Optional[Union[Literal["all"], AbstractSet[str]]] = None,
-        disallowed_special: Union[Literal["all"], Collection[str]] = (),
-    ) -> List[int]:
+        allowed_special: Literal["all"] | Set[str] | None = None,
+        disallowed_special: Literal["all"] | Collection[str] = (),
+    ) -> list[int]:
         """
         Encodes a string into a list of token IDs.
 
@@ -217,7 +210,7 @@ class Tokenizer:
                 s[i : i + TIKTOKEN_MAX_ENCODE_CHARS], MAX_NO_WHITESPACES_CHARS
             )
         )
-        t: List[int] = []
+        t: list[int] = []
         for substr in substrs:
             t.extend(
                 self.model.encode(
@@ -243,7 +236,7 @@ class Tokenizer:
             str: The decoded string.
         """
         # Typecast is safe here. Tiktoken doesn't do anything list-related with the sequence.
-        return self.model.decode(cast(List[int], t))
+        return self.model.decode(cast(list[int], t))
 
     @staticmethod
     def _split_whitespaces_or_nonwhitespaces(s: str, max_consecutive_slice_len: int) -> Iterator[str]:
diff --git a/llama_stack/models/llama/llama4/vision/embedding.py b/llama_stack/models/llama/llama4/vision/embedding.py
index ed7659a73..c7dd81965 100644
--- a/llama_stack/models/llama/llama4/vision/embedding.py
+++ b/llama_stack/models/llama/llama4/vision/embedding.py
@@ -5,7 +5,8 @@
 # the root directory of this source tree.
 
 import math
-from typing import Any, Callable, Dict, List
+from collections.abc import Callable
+from typing import Any
 
 import torch
 import torch.nn as nn
@@ -136,13 +137,13 @@ class VisionEmbeddings(torch.nn.Module):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool = True,
-        missing_keys: List[str] = None,
-        unexpected_keys: List[str] = None,
-        error_msgs: List[str] = None,
+        missing_keys: list[str] = None,
+        unexpected_keys: list[str] = None,
+        error_msgs: list[str] = None,
         return_state_dict: bool = False,
     ) -> None:
         original_sd = self.state_dict()
@@ -163,7 +164,7 @@ class VisionEmbeddings(torch.nn.Module):
     # each image is a tensor of shape [num_tiles, C, H, W]
     def forward(
         self,
-        image_batch: List[List[torch.Tensor]],
+        image_batch: list[list[torch.Tensor]],
         image_mask: torch.Tensor,
         h_ref: torch.Tensor,
     ) -> torch.Tensor:
diff --git a/llama_stack/models/llama/llama4/vision/encoder.py b/llama_stack/models/llama/llama4/vision/encoder.py
index 4baf03d8d..4b66f1411 100644
--- a/llama_stack/models/llama/llama4/vision/encoder.py
+++ b/llama_stack/models/llama/llama4/vision/encoder.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from collections.abc import Callable
+from typing import Any
 
 import fairscale.nn.model_parallel.initialize as fs_init
 import torch
@@ -42,9 +43,9 @@ class ColumnParallelConv2dPatch(torch.nn.Module):
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Union[int, Tuple[int, int]],
-        stride: Union[int, Tuple[int, int]],
-        bias: Optional[bool] = False,
+        kernel_size: int | tuple[int, int],
+        stride: int | tuple[int, int],
+        bias: bool | None = False,
     ) -> None:
         super().__init__()
         if isinstance(kernel_size, int):
@@ -134,15 +135,15 @@ class _TransformerBlock(nn.Module):
     def attention(
         self,
         x: torch.Tensor,
-        freq_cis: Optional[torch.Tensor] = None,
+        freq_cis: torch.Tensor | None = None,
     ):
         return self.attn(x=x, start_pos=0, freqs_cis=freq_cis)
 
     def forward(
         self,
         x: torch.Tensor,
-        mask: Optional[torch.Tensor] = None,
-        freq_cis: Optional[torch.Tensor] = None,
+        mask: torch.Tensor | None = None,
+        freq_cis: torch.Tensor | None = None,
     ):
         _gate_attn = 1 if not self.gated else self.gate_attn.tanh()
         _gate_ffn = 1 if not self.gated else self.gate_ffn.tanh()
@@ -210,8 +211,8 @@ class PackingIndex:
 class VisionEncoder(nn.Module):
     def __init__(
         self,
-        image_size: Tuple[int, int],
-        patch_size: Tuple[int, int],
+        image_size: tuple[int, int],
+        patch_size: tuple[int, int],
         dim: int,
         layers: int,
         heads: int,
@@ -299,13 +300,13 @@ class VisionEncoder(nn.Module):
 
     def load_hook(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         prefix: str,
-        local_metadata: Dict[str, Any],
+        local_metadata: dict[str, Any],
         strict: bool = True,
-        missing_keys: List[str] = None,
-        unexpected_keys: List[str] = None,
-        error_msgs: List[str] = None,
+        missing_keys: list[str] = None,
+        unexpected_keys: list[str] = None,
+        error_msgs: list[str] = None,
         return_state_dict: bool = False,
     ) -> None:
         orig_pos_embed = state_dict.get(prefix + "positional_embedding")
diff --git a/llama_stack/models/llama/prompt_format.py b/llama_stack/models/llama/prompt_format.py
index edb34620c..6191df61a 100644
--- a/llama_stack/models/llama/prompt_format.py
+++ b/llama_stack/models/llama/prompt_format.py
@@ -14,7 +14,6 @@
 import json
 import textwrap
 from pathlib import Path
-from typing import List
 
 from pydantic import BaseModel, Field
 
@@ -44,7 +43,7 @@ class TextCompletionContent(BaseModel):
 class UseCase(BaseModel):
     title: str = ""
     description: str = ""
-    dialogs: List[List[RawMessage] | TextCompletionContent | str] = Field(default_factory=list)
+    dialogs: list[list[RawMessage] | TextCompletionContent | str] = Field(default_factory=list)
     notes: str = ""
     tool_prompt_format: ToolPromptFormat = ToolPromptFormat.json
     max_gen_len: int = 512
diff --git a/llama_stack/models/llama/quantize_impls.py b/llama_stack/models/llama/quantize_impls.py
index a5da01588..a6400c5c9 100644
--- a/llama_stack/models/llama/quantize_impls.py
+++ b/llama_stack/models/llama/quantize_impls.py
@@ -7,7 +7,6 @@
 # type: ignore
 import collections
 import logging
-from typing import Optional, Tuple, Type, Union
 
 log = logging.getLogger(__name__)
 
@@ -27,7 +26,7 @@ class Fp8ScaledWeights:
     # TODO: Ugly trick so torch allows us to replace parameters
     # with our custom Fp8Weights instance. Do this properly.
     @property
-    def __class__(self) -> Type[nn.parameter.Parameter]:
+    def __class__(self) -> type[nn.parameter.Parameter]:
         return nn.Parameter
 
     @property
@@ -51,7 +50,7 @@ class Int4ScaledWeights:
     # TODO: Ugly trick so torch allows us to replace parameters
     # with our custom Int4Weights instance. Do this properly.
     @property
-    def __class__(self) -> Type[nn.parameter.Parameter]:
+    def __class__(self) -> type[nn.parameter.Parameter]:
         return nn.Parameter
 
     @property
@@ -74,7 +73,7 @@ class Int4Weights(
 def int4_row_quantize(
     x: torch.Tensor,
     group_size: int = 128,
-) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     n_bit = 4  # Number of target bits.
     to_quant = x.reshape(-1, group_size).to(torch.float)
 
@@ -115,8 +114,8 @@ def pack_int4(x: torch.Tensor) -> torch.Tensor:
 
 def bmm_nt(
     x: Tensor,
-    w: Union[Fp8RowwiseWeights, Int4Weights],
-    num_tokens: Optional[Tensor] = None,
+    w: Fp8RowwiseWeights | Int4Weights,
+    num_tokens: Tensor | None = None,
 ) -> Tensor:
     if isinstance(w, Fp8ScaledWeights):
         xq, x_scale = torch.ops.fbgemm.quantize_fp8_per_row(x, num_tokens, w.activation_scale_ub)
@@ -129,10 +128,10 @@ def bmm_nt(
 
 def ffn_swiglu(
     x: Tensor,
-    w1: Union[Fp8RowwiseWeights, Int4Weights],
-    w3: Union[Fp8RowwiseWeights, Int4Weights],
-    w2: Union[Fp8RowwiseWeights, Int4Weights],
-    num_tokens: Optional[Tensor] = None,
+    w1: Fp8RowwiseWeights | Int4Weights,
+    w3: Fp8RowwiseWeights | Int4Weights,
+    w2: Fp8RowwiseWeights | Int4Weights,
+    num_tokens: Tensor | None = None,
     is_memory_bounded: bool = False,
 ) -> Tensor:
     if (isinstance(w1, Fp8ScaledWeights) and isinstance(w3, Fp8ScaledWeights) and isinstance(w2, Fp8ScaledWeights)) or (
@@ -158,7 +157,7 @@ def ffn_swiglu(
 def quantize_fp8(
     w: Tensor,
     fp8_activation_scale_ub: float,
-    output_device: Optional[torch.device] = None,
+    output_device: torch.device | None = None,
 ) -> Fp8RowwiseWeights:
     """Quantize [n, k] weight tensor.
 
@@ -184,7 +183,7 @@ def quantize_fp8(
 @torch.inference_mode()
 def quantize_int4(
     w: Tensor,
-    output_device: Optional[torch.device] = None,
+    output_device: torch.device | None = None,
 ) -> Int4Weights:
     """Quantize [n, k/2] weight tensor.
 
@@ -213,7 +212,7 @@ def load_fp8(
     w: Tensor,
     w_scale: Tensor,
     fp8_activation_scale_ub: float,
-    output_device: Optional[torch.device] = None,
+    output_device: torch.device | None = None,
 ) -> Fp8RowwiseWeights:
     """Load FP8 [n, k] weight tensor.
 
@@ -239,7 +238,7 @@ def load_int4(
     w: Tensor,
     scale: Tensor,
     zero_point: Tensor,
-    output_device: Optional[torch.device] = None,
+    output_device: torch.device | None = None,
 ) -> Int4Weights:
     """Load INT4 [n, k/2] weight tensor.
 
@@ -256,9 +255,9 @@ def load_int4(
 
 def fc_dynamic(
     x: Tensor,
-    w: Union[Fp8RowwiseWeights, Int4Weights],
-    activation_scale_ub: Optional[Tensor] = None,
-    num_tokens: Optional[Tensor] = None,
+    w: Fp8RowwiseWeights | Int4Weights,
+    activation_scale_ub: Tensor | None = None,
+    num_tokens: Tensor | None = None,
     is_memory_bounded: bool = False,
 ) -> Tensor:
     """
@@ -275,11 +274,11 @@ def fc_dynamic(
 
 def ffn_swiglu_dynamic(
     x: Tensor,
-    w1: Union[Fp8RowwiseWeights, Int4Weights],
-    w3: Union[Fp8RowwiseWeights, Int4Weights],
-    w2: Union[Fp8RowwiseWeights, Int4Weights],
-    activation_scale_ub: Optional[Tensor] = None,
-    num_tokens: Optional[Tensor] = None,
+    w1: Fp8RowwiseWeights | Int4Weights,
+    w3: Fp8RowwiseWeights | Int4Weights,
+    w2: Fp8RowwiseWeights | Int4Weights,
+    activation_scale_ub: Tensor | None = None,
+    num_tokens: Tensor | None = None,
     is_memory_bounded: bool = False,
 ) -> Tensor:
     assert x.dim() == 3 or x.dim() == 2
diff --git a/llama_stack/models/llama/sku_list.py b/llama_stack/models/llama/sku_list.py
index 513481831..271cec63f 100644
--- a/llama_stack/models/llama/sku_list.py
+++ b/llama_stack/models/llama/sku_list.py
@@ -6,7 +6,6 @@
 
 from dataclasses import dataclass
 from functools import lru_cache
-from typing import List, Optional
 
 from .sku_types import (
     CheckpointQuantizationFormat,
@@ -19,14 +18,14 @@ LLAMA2_VOCAB_SIZE = 32000
 LLAMA3_VOCAB_SIZE = 128256
 
 
-def resolve_model(descriptor: str) -> Optional[Model]:
+def resolve_model(descriptor: str) -> Model | None:
     for m in all_registered_models():
         if descriptor in (m.descriptor(), m.huggingface_repo):
             return m
     return None
 
 
-def all_registered_models() -> List[Model]:
+def all_registered_models() -> list[Model]:
     return (
         llama2_family()
         + llama3_family()
@@ -38,48 +37,48 @@ def all_registered_models() -> List[Model]:
     )
 
 
-def llama2_family() -> List[Model]:
+def llama2_family() -> list[Model]:
     return [
         *llama2_base_models(),
         *llama2_instruct_models(),
     ]
 
 
-def llama3_family() -> List[Model]:
+def llama3_family() -> list[Model]:
     return [
         *llama3_base_models(),
         *llama3_instruct_models(),
     ]
 
 
-def llama3_1_family() -> List[Model]:
+def llama3_1_family() -> list[Model]:
     return [
         *llama3_1_base_models(),
         *llama3_1_instruct_models(),
     ]
 
 
-def llama3_2_family() -> List[Model]:
+def llama3_2_family() -> list[Model]:
     return [
         *llama3_2_base_models(),
         *llama3_2_instruct_models(),
     ]
 
 
-def llama3_3_family() -> List[Model]:
+def llama3_3_family() -> list[Model]:
     return [
         *llama3_3_instruct_models(),
     ]
 
 
-def llama4_family() -> List[Model]:
+def llama4_family() -> list[Model]:
     return [
         *llama4_base_models(),
         *llama4_instruct_models(),
     ]
 
 
-def llama4_base_models() -> List[Model]:
+def llama4_base_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama4_scout_17b_16e,
@@ -98,7 +97,7 @@ def llama4_base_models() -> List[Model]:
     ]
 
 
-def llama4_instruct_models() -> List[Model]:
+def llama4_instruct_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama4_scout_17b_16e_instruct,
@@ -126,7 +125,7 @@ def llama4_instruct_models() -> List[Model]:
     ]
 
 
-def llama2_base_models() -> List[Model]:
+def llama2_base_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama2_7b,
@@ -185,7 +184,7 @@ def llama2_base_models() -> List[Model]:
     ]
 
 
-def llama3_base_models() -> List[Model]:
+def llama3_base_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama3_8b,
@@ -226,7 +225,7 @@ def llama3_base_models() -> List[Model]:
     ]
 
 
-def llama3_1_base_models() -> List[Model]:
+def llama3_1_base_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama3_1_8b,
@@ -324,7 +323,7 @@ def llama3_1_base_models() -> List[Model]:
     ]
 
 
-def llama3_2_base_models() -> List[Model]:
+def llama3_2_base_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama3_2_1b,
@@ -407,7 +406,7 @@ def llama3_2_base_models() -> List[Model]:
     ]
 
 
-def llama2_instruct_models() -> List[Model]:
+def llama2_instruct_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama2_7b_chat,
@@ -466,7 +465,7 @@ def llama2_instruct_models() -> List[Model]:
     ]
 
 
-def llama3_instruct_models() -> List[Model]:
+def llama3_instruct_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama3_8b_instruct,
@@ -507,7 +506,7 @@ def llama3_instruct_models() -> List[Model]:
     ]
 
 
-def llama3_1_instruct_models() -> List[Model]:
+def llama3_1_instruct_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama3_1_8b_instruct,
@@ -635,7 +634,7 @@ def arch_args_3b() -> dict:
     }
 
 
-def llama3_2_quantized_models() -> List[Model]:
+def llama3_2_quantized_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama3_2_1b_instruct,
@@ -704,7 +703,7 @@ def llama3_2_quantized_models() -> List[Model]:
     ]
 
 
-def llama3_2_instruct_models() -> List[Model]:
+def llama3_2_instruct_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama3_2_1b_instruct,
@@ -766,7 +765,7 @@ def llama3_2_instruct_models() -> List[Model]:
     ]
 
 
-def llama3_3_instruct_models() -> List[Model]:
+def llama3_3_instruct_models() -> list[Model]:
     return [
         Model(
             core_model_id=CoreModelId.llama3_3_70b_instruct,
@@ -790,8 +789,15 @@ def llama3_3_instruct_models() -> List[Model]:
 
 
 @lru_cache
-def safety_models() -> List[Model]:
+def safety_models() -> list[Model]:
     return [
+        Model(
+            core_model_id=CoreModelId.llama_guard_4_12b,
+            description="Llama Guard v4 12b system safety model",
+            huggingface_repo="meta-llama/Llama-Guard-4-12B",
+            arch_args={},
+            pth_file_count=1,
+        ),
         Model(
             core_model_id=CoreModelId.llama_guard_3_11b_vision,
             description="Llama Guard v3 11b vision system safety model",
@@ -912,7 +918,7 @@ def safety_models() -> List[Model]:
 @dataclass
 class LlamaDownloadInfo:
     folder: str
-    files: List[str]
+    files: list[str]
     pth_size: int
 
 
@@ -942,6 +948,8 @@ def llama_meta_net_info(model: Model) -> LlamaDownloadInfo:
     elif model.core_model_id == CoreModelId.llama_guard_2_8b:
         folder = "llama-guard-2"
     else:
+        if model.huggingface_repo is None:
+            raise ValueError(f"Model {model.core_model_id} has no huggingface_repo set")
         folder = model.huggingface_repo.split("/")[-1]
         if "Llama-2" in folder:
             folder = folder.lower()
@@ -1018,3 +1026,4 @@ def llama_meta_pth_size(model: Model) -> int:
                 return 54121549657
             else:
                 return 100426653046
+    return 0
diff --git a/llama_stack/models/llama/sku_types.py b/llama_stack/models/llama/sku_types.py
index 88799b66d..4147707d5 100644
--- a/llama_stack/models/llama/sku_types.py
+++ b/llama_stack/models/llama/sku_types.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, ConfigDict, Field
 
@@ -81,6 +81,7 @@ class CoreModelId(Enum):
     llama_guard_2_8b = "Llama-Guard-2-8B"
     llama_guard_3_11b_vision = "Llama-Guard-3-11B-Vision"
     llama_guard_3_1b = "Llama-Guard-3-1B"
+    llama_guard_4_12b = "Llama-Guard-4-12B"
 
 
 def is_multimodal(model_id) -> bool:
@@ -148,6 +149,7 @@ def model_family(model_id) -> ModelFamily:
         CoreModelId.llama_guard_2_8b,
         CoreModelId.llama_guard_3_11b_vision,
         CoreModelId.llama_guard_3_1b,
+        CoreModelId.llama_guard_4_12b,
     ]:
         return ModelFamily.safety
     else:
@@ -157,13 +159,13 @@ def model_family(model_id) -> ModelFamily:
 class Model(BaseModel):
     core_model_id: CoreModelId
     description: str
-    huggingface_repo: Optional[str] = None
-    arch_args: Dict[str, Any]
+    huggingface_repo: str | None = None
+    arch_args: dict[str, Any]
     variant: str = ""
 
     quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
     pth_file_count: int
-    metadata: Dict[str, Any] = Field(default_factory=dict)
+    metadata: dict[str, Any] = Field(default_factory=dict)
 
     # silence pydantic until we remove the `model_` fields
     model_config = ConfigDict(protected_namespaces=())
@@ -225,5 +227,7 @@ class Model(BaseModel):
             CoreModelId.llama_guard_3_1b,
         ]:
             return 131072
+        elif self.core_model_id == CoreModelId.llama_guard_4_12b:
+            return 8192
         else:
             raise ValueError(f"Unknown max_seq_len for {self.core_model_id}")
diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py
index 32dfba30c..60b05545b 100644
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, List, Optional, Protocol
+from enum import Enum
+from typing import Any, Protocol
 from urllib.parse import urlparse
 
 from pydantic import BaseModel, Field
@@ -15,12 +16,33 @@ from llama_stack.apis.datatypes import Api
 from llama_stack.apis.models import Model
 from llama_stack.apis.scoring_functions import ScoringFn
 from llama_stack.apis.shields import Shield
-from llama_stack.apis.tools import Tool
+from llama_stack.apis.tools import ToolGroup
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.schema_utils import json_schema_type
 
 
 class ModelsProtocolPrivate(Protocol):
+    """
+    Protocol for model management.
+
+    This allows users to register their preferred model identifiers.
+
+    Model registration requires -
+     - a provider, used to route the registration request
+     - a model identifier, user's intended name for the model during inference
+     - a provider model identifier, a model identifier supported by the provider
+
+    Providers will only accept registration for provider model ids they support.
+
+    Example,
+      register: provider x my-model-id x provider-model-id
+       -> Error if provider does not support provider-model-id
+       -> Error if my-model-id is already registered
+       -> Success if provider supports provider-model-id
+      inference: my-model-id x ...
+       -> Provider uses provider-model-id for inference
+    """
+
     async def register_model(self, model: Model) -> Model: ...
 
     async def unregister_model(self, model_id: str) -> None: ...
@@ -43,7 +65,7 @@ class DatasetsProtocolPrivate(Protocol):
 
 
 class ScoringFunctionsProtocolPrivate(Protocol):
-    async def list_scoring_functions(self) -> List[ScoringFn]: ...
+    async def list_scoring_functions(self) -> list[ScoringFn]: ...
 
     async def register_scoring_function(self, scoring_fn: ScoringFn) -> None: ...
 
@@ -52,10 +74,10 @@ class BenchmarksProtocolPrivate(Protocol):
     async def register_benchmark(self, benchmark: Benchmark) -> None: ...
 
 
-class ToolsProtocolPrivate(Protocol):
-    async def register_tool(self, tool: Tool) -> None: ...
+class ToolGroupsProtocolPrivate(Protocol):
+    async def register_toolgroup(self, toolgroup: ToolGroup) -> None: ...
 
-    async def unregister_tool(self, tool_id: str) -> None: ...
+    async def unregister_toolgroup(self, toolgroup_id: str) -> None: ...
 
 
 @json_schema_type
@@ -66,24 +88,24 @@ class ProviderSpec(BaseModel):
         ...,
         description="Fully-qualified classname of the config for this provider",
     )
-    api_dependencies: List[Api] = Field(
+    api_dependencies: list[Api] = Field(
         default_factory=list,
         description="Higher-level API surfaces may depend on other providers to provide their functionality",
     )
-    optional_api_dependencies: List[Api] = Field(
+    optional_api_dependencies: list[Api] = Field(
         default_factory=list,
     )
-    deprecation_warning: Optional[str] = Field(
+    deprecation_warning: str | None = Field(
         default=None,
         description="If this provider is deprecated, specify the warning message here",
     )
-    deprecation_error: Optional[str] = Field(
+    deprecation_error: str | None = Field(
         default=None,
         description="If this provider is deprecated and does NOT work, specify the error message here",
     )
 
     # used internally by the resolver; this is a hack for now
-    deps__: List[str] = Field(default_factory=list)
+    deps__: list[str] = Field(default_factory=list)
 
     @property
     def is_sample(self) -> bool:
@@ -109,25 +131,25 @@ Fully-qualified name of the module to import. The module is expected to have:
  - `get_adapter_impl(config, deps)`: returns the adapter implementation
 """,
     )
-    pip_packages: List[str] = Field(
+    pip_packages: list[str] = Field(
         default_factory=list,
         description="The pip dependencies needed for this implementation",
     )
     config_class: str = Field(
         description="Fully-qualified classname of the config for this provider",
     )
-    provider_data_validator: Optional[str] = Field(
+    provider_data_validator: str | None = Field(
         default=None,
     )
 
 
 @json_schema_type
 class InlineProviderSpec(ProviderSpec):
-    pip_packages: List[str] = Field(
+    pip_packages: list[str] = Field(
         default_factory=list,
         description="The pip dependencies needed for this implementation",
     )
-    container_image: Optional[str] = Field(
+    container_image: str | None = Field(
         default=None,
         description="""
 The container image to use for this implementation. If one is provided, pip_packages will be ignored.
@@ -142,14 +164,14 @@ Fully-qualified name of the module to import. The module is expected to have:
  - `get_provider_impl(config, deps)`: returns the local implementation
 """,
     )
-    provider_data_validator: Optional[str] = Field(
+    provider_data_validator: str | None = Field(
         default=None,
     )
 
 
 class RemoteProviderConfig(BaseModel):
     host: str = "localhost"
-    port: Optional[int] = None
+    port: int | None = None
     protocol: str = "http"
 
     @property
@@ -175,7 +197,7 @@ API responses, specify the adapter here.
     )
 
     @property
-    def container_image(self) -> Optional[str]:
+    def container_image(self) -> str | None:
         return None
 
     @property
@@ -183,16 +205,16 @@ API responses, specify the adapter here.
         return self.adapter.module
 
     @property
-    def pip_packages(self) -> List[str]:
+    def pip_packages(self) -> list[str]:
         return self.adapter.pip_packages
 
     @property
-    def provider_data_validator(self) -> Optional[str]:
+    def provider_data_validator(self) -> str | None:
         return self.adapter.provider_data_validator
 
 
 def remote_provider_spec(
-    api: Api, adapter: AdapterSpec, api_dependencies: Optional[List[Api]] = None
+    api: Api, adapter: AdapterSpec, api_dependencies: list[Api] | None = None
 ) -> RemoteProviderSpec:
     return RemoteProviderSpec(
         api=api,
@@ -201,3 +223,12 @@ def remote_provider_spec(
         adapter=adapter,
         api_dependencies=api_dependencies or [],
     )
+
+
+class HealthStatus(str, Enum):
+    OK = "OK"
+    ERROR = "Error"
+    NOT_IMPLEMENTED = "Not Implemented"
+
+
+HealthResponse = dict[str, Any]
diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py
index 4be064f1d..7503b8c90 100644
--- a/llama_stack/providers/inline/agents/meta_reference/__init__.py
+++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.distribution.datatypes import Api
 
 from .config import MetaReferenceAgentsImplConfig
 
 
-async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: Dict[Api, Any]):
+async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Api, Any]):
     from .agents import MetaReferenceAgentsImpl
 
     impl = MetaReferenceAgentsImpl(
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index f441d6eb6..2e387e7e8 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -10,8 +10,8 @@ import re
 import secrets
 import string
 import uuid
+from collections.abc import AsyncGenerator
 from datetime import datetime, timezone
-from typing import AsyncGenerator, List, Optional, Union
 
 import httpx
 
@@ -95,6 +95,7 @@ class ChatAgent(ShieldRunnerMixin):
         tool_groups_api: ToolGroups,
         vector_io_api: VectorIO,
         persistence_store: KVStore,
+        created_at: str,
     ):
         self.agent_id = agent_id
         self.agent_config = agent_config
@@ -104,6 +105,7 @@ class ChatAgent(ShieldRunnerMixin):
         self.storage = AgentPersistence(agent_id, persistence_store)
         self.tool_runtime_api = tool_runtime_api
         self.tool_groups_api = tool_groups_api
+        self.created_at = created_at
 
         ShieldRunnerMixin.__init__(
             self,
@@ -112,7 +114,7 @@ class ChatAgent(ShieldRunnerMixin):
             output_shields=agent_config.output_shields,
         )
 
-    def turn_to_messages(self, turn: Turn) -> List[Message]:
+    def turn_to_messages(self, turn: Turn) -> list[Message]:
         messages = []
 
         # NOTE: if a toolcall response is in a step, we do not add it when processing the input messages
@@ -161,7 +163,7 @@ class ChatAgent(ShieldRunnerMixin):
     async def create_session(self, name: str) -> str:
         return await self.storage.create_session(name)
 
-    async def get_messages_from_turns(self, turns: List[Turn]) -> List[Message]:
+    async def get_messages_from_turns(self, turns: list[Turn]) -> list[Message]:
         messages = []
         if self.agent_config.instructions != "":
             messages.append(SystemMessage(content=self.agent_config.instructions))
@@ -178,6 +180,8 @@ class ChatAgent(ShieldRunnerMixin):
             span.set_attribute("request", request.model_dump_json())
             turn_id = str(uuid.uuid4())
             span.set_attribute("turn_id", turn_id)
+            if self.agent_config.name:
+                span.set_attribute("agent_name", self.agent_config.name)
 
         await self._initialize_tools(request.toolgroups)
         async for chunk in self._run_turn(request, turn_id):
@@ -190,6 +194,8 @@ class ChatAgent(ShieldRunnerMixin):
             span.set_attribute("session_id", request.session_id)
             span.set_attribute("request", request.model_dump_json())
             span.set_attribute("turn_id", request.turn_id)
+            if self.agent_config.name:
+                span.set_attribute("agent_name", self.agent_config.name)
 
         await self._initialize_tools()
         async for chunk in self._run_turn(request):
@@ -197,8 +203,8 @@ class ChatAgent(ShieldRunnerMixin):
 
     async def _run_turn(
         self,
-        request: Union[AgentTurnCreateRequest, AgentTurnResumeRequest],
-        turn_id: Optional[str] = None,
+        request: AgentTurnCreateRequest | AgentTurnResumeRequest,
+        turn_id: str | None = None,
     ) -> AsyncGenerator:
         assert request.stream is True, "Non-streaming not supported"
 
@@ -317,10 +323,10 @@ class ChatAgent(ShieldRunnerMixin):
         self,
         session_id: str,
         turn_id: str,
-        input_messages: List[Message],
+        input_messages: list[Message],
         sampling_params: SamplingParams,
         stream: bool = False,
-        documents: Optional[List[Document]] = None,
+        documents: list[Document] | None = None,
     ) -> AsyncGenerator:
         # Doing async generators makes downstream code much simpler and everything amenable to
         # streaming. However, it also makes things complicated here because AsyncGenerators cannot
@@ -370,8 +376,8 @@ class ChatAgent(ShieldRunnerMixin):
     async def run_multiple_shields_wrapper(
         self,
         turn_id: str,
-        messages: List[Message],
-        shields: List[str],
+        messages: list[Message],
+        shields: list[str],
         touchpoint: str,
     ) -> AsyncGenerator:
         async with tracing.span("run_shields") as span:
@@ -439,10 +445,10 @@ class ChatAgent(ShieldRunnerMixin):
         self,
         session_id: str,
         turn_id: str,
-        input_messages: List[Message],
+        input_messages: list[Message],
         sampling_params: SamplingParams,
         stream: bool = False,
-        documents: Optional[List[Document]] = None,
+        documents: list[Document] | None = None,
     ) -> AsyncGenerator:
         # if document is passed in a turn, we parse the raw text of the document
         # and sent it as a user message
@@ -498,6 +504,8 @@ class ChatAgent(ShieldRunnerMixin):
             stop_reason = None
 
             async with tracing.span("inference") as span:
+                if self.agent_config.name:
+                    span.set_attribute("agent_name", self.agent_config.name)
                 async for chunk in await self.inference_api.chat_completion(
                     self.agent_config.model,
                     input_messages,
@@ -754,7 +762,7 @@ class ChatAgent(ShieldRunnerMixin):
 
     async def _initialize_tools(
         self,
-        toolgroups_for_turn: Optional[List[AgentToolGroup]] = None,
+        toolgroups_for_turn: list[AgentToolGroup] | None = None,
     ) -> None:
         toolgroup_to_args = {}
         for toolgroup in (self.agent_config.toolgroups or []) + (toolgroups_for_turn or []):
@@ -841,7 +849,7 @@ class ChatAgent(ShieldRunnerMixin):
             tool_name_to_args,
         )
 
-    def _parse_toolgroup_name(self, toolgroup_name_with_maybe_tool_name: str) -> tuple[str, Optional[str]]:
+    def _parse_toolgroup_name(self, toolgroup_name_with_maybe_tool_name: str) -> tuple[str, str | None]:
         """Parse a toolgroup name into its components.
 
         Args:
@@ -915,7 +923,7 @@ async def get_raw_document_text(document: Document) -> str:
 
 def _interpret_content_as_attachment(
     content: str,
-) -> Optional[Attachment]:
+) -> Attachment | None:
     match = re.search(TOOLS_ATTACHMENT_KEY_REGEX, content)
     if match:
         snippet = match.group(1)
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 656178773..bcbfcbe31 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -4,11 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import json
 import logging
-import shutil
 import uuid
-from typing import AsyncGenerator, List, Optional, Union
+from collections.abc import AsyncGenerator
+from datetime import datetime, timezone
 
 from llama_stack.apis.agents import (
     Agent,
@@ -21,11 +20,16 @@ from llama_stack.apis.agents import (
     AgentTurnCreateRequest,
     AgentTurnResumeRequest,
     Document,
-    ListAgentSessionsResponse,
-    ListAgentsResponse,
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    OpenAIResponseInput,
+    OpenAIResponseInputTool,
+    OpenAIResponseObject,
+    Order,
     Session,
     Turn,
 )
+from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.inference import (
     Inference,
     ToolConfig,
@@ -37,12 +41,15 @@ from llama_stack.apis.safety import Safety
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
+from llama_stack.providers.utils.pagination import paginate_records
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 
 from .agent_instance import ChatAgent
 from .config import MetaReferenceAgentsImplConfig
+from .openai_responses import OpenAIResponsesImpl
+from .persistence import AgentInfo
 
 logger = logging.getLogger()
-logger.setLevel(logging.INFO)
 
 
 class MetaReferenceAgentsImpl(Agents):
@@ -63,56 +70,65 @@ class MetaReferenceAgentsImpl(Agents):
         self.tool_groups_api = tool_groups_api
 
         self.in_memory_store = InmemoryKVStoreImpl()
+        self.openai_responses_impl: OpenAIResponsesImpl | None = None
 
     async def initialize(self) -> None:
         self.persistence_store = await kvstore_impl(self.config.persistence_store)
-
-        # check if "bwrap" is available
-        if not shutil.which("bwrap"):
-            logger.warning("Warning: `bwrap` is not available. Code interpreter tool will not work correctly.")
+        self.responses_store = ResponsesStore(self.config.responses_store)
+        await self.responses_store.initialize()
+        self.openai_responses_impl = OpenAIResponsesImpl(
+            inference_api=self.inference_api,
+            tool_groups_api=self.tool_groups_api,
+            tool_runtime_api=self.tool_runtime_api,
+            responses_store=self.responses_store,
+        )
 
     async def create_agent(
         self,
         agent_config: AgentConfig,
     ) -> AgentCreateResponse:
         agent_id = str(uuid.uuid4())
+        created_at = datetime.now(timezone.utc)
 
+        agent_info = AgentInfo(
+            **agent_config.model_dump(),
+            created_at=created_at,
+        )
+
+        # Store the agent info
         await self.persistence_store.set(
             key=f"agent:{agent_id}",
-            value=agent_config.model_dump_json(),
+            value=agent_info.model_dump_json(),
         )
+
         return AgentCreateResponse(
             agent_id=agent_id,
         )
 
     async def _get_agent_impl(self, agent_id: str) -> ChatAgent:
-        agent_config = await self.persistence_store.get(
+        agent_info_json = await self.persistence_store.get(
             key=f"agent:{agent_id}",
         )
-        if not agent_config:
-            raise ValueError(f"Could not find agent config for {agent_id}")
+        if not agent_info_json:
+            raise ValueError(f"Could not find agent info for {agent_id}")
 
         try:
-            agent_config = json.loads(agent_config)
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Could not JSON decode agent config for {agent_id}") from e
-
-        try:
-            agent_config = AgentConfig(**agent_config)
+            agent_info = AgentInfo.model_validate_json(agent_info_json)
         except Exception as e:
-            raise ValueError(f"Could not validate(?) agent config for {agent_id}") from e
+            raise ValueError(f"Could not validate agent info for {agent_id}") from e
 
         return ChatAgent(
             agent_id=agent_id,
-            agent_config=agent_config,
+            agent_config=agent_info,
             inference_api=self.inference_api,
             safety_api=self.safety_api,
             vector_io_api=self.vector_io_api,
             tool_runtime_api=self.tool_runtime_api,
             tool_groups_api=self.tool_groups_api,
             persistence_store=(
-                self.persistence_store if agent_config.enable_session_persistence else self.in_memory_store
+                self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
             ),
+            created_at=agent_info.created_at,
         )
 
     async def create_agent_session(
@@ -131,16 +147,11 @@ class MetaReferenceAgentsImpl(Agents):
         self,
         agent_id: str,
         session_id: str,
-        messages: List[
-            Union[
-                UserMessage,
-                ToolResponseMessage,
-            ]
-        ],
-        toolgroups: Optional[List[AgentToolGroup]] = None,
-        documents: Optional[List[Document]] = None,
-        stream: Optional[bool] = False,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[UserMessage | ToolResponseMessage],
+        toolgroups: list[AgentToolGroup] | None = None,
+        documents: list[Document] | None = None,
+        stream: bool | None = False,
+        tool_config: ToolConfig | None = None,
     ) -> AsyncGenerator:
         request = AgentTurnCreateRequest(
             agent_id=agent_id,
@@ -169,8 +180,8 @@ class MetaReferenceAgentsImpl(Agents):
         agent_id: str,
         session_id: str,
         turn_id: str,
-        tool_responses: List[ToolResponse],
-        stream: Optional[bool] = False,
+        tool_responses: list[ToolResponse],
+        stream: bool | None = False,
     ) -> AsyncGenerator:
         request = AgentTurnResumeRequest(
             agent_id=agent_id,
@@ -208,9 +219,10 @@ class MetaReferenceAgentsImpl(Agents):
         self,
         agent_id: str,
         session_id: str,
-        turn_ids: Optional[List[str]] = None,
+        turn_ids: list[str] | None = None,
     ) -> Session:
         agent = await self._get_agent_impl(agent_id)
+
         session_info = await agent.storage.get_session_info(session_id)
         if session_info is None:
             raise ValueError(f"Session {session_id} not found")
@@ -225,22 +237,117 @@ class MetaReferenceAgentsImpl(Agents):
         )
 
     async def delete_agents_session(self, agent_id: str, session_id: str) -> None:
-        await self.persistence_store.delete(f"session:{agent_id}:{session_id}")
+        agent = await self._get_agent_impl(agent_id)
+        session_info = await agent.storage.get_session_info(session_id)
+        if session_info is None:
+            raise ValueError(f"Session {session_id} not found")
+
+        # Delete turns first, then the session
+        await agent.storage.delete_session_turns(session_id)
+        await agent.storage.delete_session(session_id)
 
     async def delete_agent(self, agent_id: str) -> None:
+        # First get all sessions for this agent
+        agent = await self._get_agent_impl(agent_id)
+        sessions = await agent.storage.list_sessions()
+
+        # Delete all sessions
+        for session in sessions:
+            await self.delete_agents_session(agent_id, session.session_id)
+
+        # Finally delete the agent itself
         await self.persistence_store.delete(f"agent:{agent_id}")
 
+    async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
+        agent_keys = await self.persistence_store.keys_in_range("agent:", "agent:\xff")
+        agent_list: list[Agent] = []
+        for agent_key in agent_keys:
+            agent_id = agent_key.split(":")[1]
+
+            # Get the agent info using the key
+            agent_info_json = await self.persistence_store.get(agent_key)
+            if not agent_info_json:
+                logger.error(f"Could not find agent info for key {agent_key}")
+                continue
+
+            try:
+                agent_info = AgentInfo.model_validate_json(agent_info_json)
+                agent_list.append(
+                    Agent(
+                        agent_id=agent_id,
+                        agent_config=agent_info,
+                        created_at=agent_info.created_at,
+                    )
+                )
+            except Exception as e:
+                logger.error(f"Error parsing agent info for {agent_id}: {e}")
+                continue
+
+        # Convert Agent objects to dictionaries
+        agent_dicts = [agent.model_dump() for agent in agent_list]
+        return paginate_records(agent_dicts, start_index, limit)
+
+    async def get_agent(self, agent_id: str) -> Agent:
+        chat_agent = await self._get_agent_impl(agent_id)
+        agent = Agent(
+            agent_id=agent_id,
+            agent_config=chat_agent.agent_config,
+            created_at=chat_agent.created_at,
+        )
+        return agent
+
+    async def list_agent_sessions(
+        self, agent_id: str, start_index: int | None = None, limit: int | None = None
+    ) -> PaginatedResponse:
+        agent = await self._get_agent_impl(agent_id)
+        sessions = await agent.storage.list_sessions()
+        # Convert Session objects to dictionaries
+        session_dicts = [session.model_dump() for session in sessions]
+        return paginate_records(session_dicts, start_index, limit)
+
     async def shutdown(self) -> None:
         pass
 
-    async def list_agents(self) -> ListAgentsResponse:
-        pass
-
-    async def get_agent(self, agent_id: str) -> Agent:
-        pass
-
-    async def list_agent_sessions(
+    # OpenAI responses
+    async def get_openai_response(
         self,
-        agent_id: str,
-    ) -> ListAgentSessionsResponse:
-        pass
+        response_id: str,
+    ) -> OpenAIResponseObject:
+        return await self.openai_responses_impl.get_openai_response(response_id)
+
+    async def create_openai_response(
+        self,
+        input: str | list[OpenAIResponseInput],
+        model: str,
+        instructions: str | None = None,
+        previous_response_id: str | None = None,
+        store: bool | None = True,
+        stream: bool | None = False,
+        temperature: float | None = None,
+        tools: list[OpenAIResponseInputTool] | None = None,
+    ) -> OpenAIResponseObject:
+        return await self.openai_responses_impl.create_openai_response(
+            input, model, instructions, previous_response_id, store, stream, temperature, tools
+        )
+
+    async def list_openai_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)
+
+    async def list_openai_response_input_items(
+        self,
+        response_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        include: list[str] | None = None,
+        limit: int | None = 20,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseInputItem:
+        return await self.openai_responses_impl.list_openai_response_input_items(
+            response_id, after, before, include, limit, order
+        )
diff --git a/llama_stack/providers/inline/agents/meta_reference/config.py b/llama_stack/providers/inline/agents/meta_reference/config.py
index ff34e5d5f..1c392f29c 100644
--- a/llama_stack/providers/inline/agents/meta_reference/config.py
+++ b/llama_stack/providers/inline/agents/meta_reference/config.py
@@ -4,22 +4,28 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
 from llama_stack.providers.utils.kvstore import KVStoreConfig
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
 
 
 class MetaReferenceAgentsImplConfig(BaseModel):
     persistence_store: KVStoreConfig
+    responses_store: SqlStoreConfig
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
             "persistence_store": SqliteKVStoreConfig.sample_run_config(
                 __distro_dir__=__distro_dir__,
                 db_name="agents_store.db",
-            )
+            ),
+            "responses_store": SqliteSqlStoreConfig.sample_run_config(
+                __distro_dir__=__distro_dir__,
+                db_name="responses_store.db",
+            ),
         }
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
new file mode 100644
index 000000000..19d7ea56f
--- /dev/null
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -0,0 +1,776 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import time
+import uuid
+from collections.abc import AsyncIterator
+from typing import Any, cast
+
+from openai.types.chat import ChatCompletionToolParam
+from pydantic import BaseModel
+
+from llama_stack.apis.agents import Order
+from llama_stack.apis.agents.openai_responses import (
+    AllowedToolsFilter,
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    OpenAIResponseInput,
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContent,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputTool,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseMessage,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+    OpenAIResponseObjectStreamResponseCompleted,
+    OpenAIResponseObjectStreamResponseCreated,
+    OpenAIResponseObjectStreamResponseOutputTextDelta,
+    OpenAIResponseOutput,
+    OpenAIResponseOutputMessageContent,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseOutputMessageWebSearchToolCall,
+)
+from llama_stack.apis.inference.inference import (
+    Inference,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChoice,
+    OpenAIDeveloperMessageParam,
+    OpenAIImageURL,
+    OpenAIMessageParam,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
+)
+from llama_stack.apis.tools.tools import ToolGroups, ToolRuntime
+from llama_stack.log import get_logger
+from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
+from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
+from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
+
+logger = get_logger(name=__name__, category="openai_responses")
+
+OPENAI_RESPONSES_PREFIX = "openai_responses:"
+
+
+async def _convert_response_content_to_chat_content(
+    content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent],
+) -> str | list[OpenAIChatCompletionContentPartParam]:
+    """
+    Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
+
+    The content schemas of each API look similar, but are not exactly the same.
+    """
+    if isinstance(content, str):
+        return content
+
+    converted_parts = []
+    for content_part in content:
+        if isinstance(content_part, OpenAIResponseInputMessageContentText):
+            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
+        elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
+            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
+        elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
+            if content_part.image_url:
+                image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
+                converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
+        elif isinstance(content_part, str):
+            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
+        else:
+            raise ValueError(
+                f"Llama Stack OpenAI Responses does not yet support content type '{type(content_part)}' in this context"
+            )
+    return converted_parts
+
+
+async def _convert_response_input_to_chat_messages(
+    input: str | list[OpenAIResponseInput],
+) -> list[OpenAIMessageParam]:
+    """
+    Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
+    """
+    messages: list[OpenAIMessageParam] = []
+    if isinstance(input, list):
+        for input_item in input:
+            if isinstance(input_item, OpenAIResponseInputFunctionToolCallOutput):
+                messages.append(
+                    OpenAIToolMessageParam(
+                        content=input_item.output,
+                        tool_call_id=input_item.call_id,
+                    )
+                )
+            elif isinstance(input_item, OpenAIResponseOutputMessageFunctionToolCall):
+                tool_call = OpenAIChatCompletionToolCall(
+                    index=0,
+                    id=input_item.call_id,
+                    function=OpenAIChatCompletionToolCallFunction(
+                        name=input_item.name,
+                        arguments=input_item.arguments,
+                    ),
+                )
+                messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
+            else:
+                content = await _convert_response_content_to_chat_content(input_item.content)
+                message_type = await _get_message_type_by_role(input_item.role)
+                if message_type is None:
+                    raise ValueError(
+                        f"Llama Stack OpenAI Responses does not yet support message role '{input_item.role}' in this context"
+                    )
+                messages.append(message_type(content=content))
+    else:
+        messages.append(OpenAIUserMessageParam(content=input))
+    return messages
+
+
+async def _convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenAIResponseMessage:
+    """
+    Convert an OpenAI Chat Completion choice into an OpenAI Response output message.
+    """
+    output_content = ""
+    if isinstance(choice.message.content, str):
+        output_content = choice.message.content
+    elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
+        output_content = choice.message.content.text
+    else:
+        raise ValueError(
+            f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
+        )
+
+    return OpenAIResponseMessage(
+        id=f"msg_{uuid.uuid4()}",
+        content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
+        status="completed",
+        role="assistant",
+    )
+
+
+async def _get_message_type_by_role(role: str):
+    role_to_type = {
+        "user": OpenAIUserMessageParam,
+        "system": OpenAISystemMessageParam,
+        "assistant": OpenAIAssistantMessageParam,
+        "developer": OpenAIDeveloperMessageParam,
+    }
+    return role_to_type.get(role)
+
+
+class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
+    input_items: ListOpenAIResponseInputItem
+    response: OpenAIResponseObject
+
+
+class ChatCompletionContext(BaseModel):
+    model: str
+    messages: list[OpenAIMessageParam]
+    tools: list[ChatCompletionToolParam] | None = None
+    mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP]
+    stream: bool
+    temperature: float | None
+
+
+class OpenAIResponsesImpl:
+    def __init__(
+        self,
+        inference_api: Inference,
+        tool_groups_api: ToolGroups,
+        tool_runtime_api: ToolRuntime,
+        responses_store: ResponsesStore,
+    ):
+        self.inference_api = inference_api
+        self.tool_groups_api = tool_groups_api
+        self.tool_runtime_api = tool_runtime_api
+        self.responses_store = responses_store
+
+    async def _prepend_previous_response(
+        self, input: str | list[OpenAIResponseInput], previous_response_id: str | None = None
+    ):
+        if previous_response_id:
+            previous_response_with_input = await self.responses_store.get_response_object(previous_response_id)
+
+            # previous response input items
+            new_input_items = previous_response_with_input.input
+
+            # previous response output items
+            new_input_items.extend(previous_response_with_input.output)
+
+            # new input items from the current request
+            if isinstance(input, str):
+                new_input_items.append(OpenAIResponseMessage(content=input, role="user"))
+            else:
+                new_input_items.extend(input)
+
+            input = new_input_items
+
+        return input
+
+    async def _prepend_instructions(self, messages, instructions):
+        if instructions:
+            messages.insert(0, OpenAISystemMessageParam(content=instructions))
+
+    async def get_openai_response(
+        self,
+        response_id: str,
+    ) -> OpenAIResponseObject:
+        response_with_input = await self.responses_store.get_response_object(response_id)
+        return OpenAIResponseObject(**{k: v for k, v in response_with_input.model_dump().items() if k != "input"})
+
+    async def list_openai_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        return await self.responses_store.list_responses(after, limit, model, order)
+
+    async def list_openai_response_input_items(
+        self,
+        response_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        include: list[str] | None = None,
+        limit: int | None = 20,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseInputItem:
+        """List input items for a given OpenAI response.
+
+        :param response_id: The ID of the response to retrieve input items for.
+        :param after: An item ID to list items after, used for pagination.
+        :param before: An item ID to list items before, used for pagination.
+        :param include: Additional fields to include in the response.
+        :param limit: A limit on the number of objects to be returned.
+        :param order: The order to return the input items in.
+        :returns: An ListOpenAIResponseInputItem.
+        """
+        return await self.responses_store.list_response_input_items(response_id, after, before, include, limit, order)
+
+    async def _process_response_choices(
+        self,
+        chat_response: OpenAIChatCompletion,
+        ctx: ChatCompletionContext,
+        tools: list[OpenAIResponseInputTool] | None,
+    ) -> list[OpenAIResponseOutput]:
+        """Handle tool execution and response message creation."""
+        output_messages: list[OpenAIResponseOutput] = []
+        # Execute tool calls if any
+        for choice in chat_response.choices:
+            if choice.message.tool_calls and tools:
+                # Assume if the first tool is a function, all tools are functions
+                if tools[0].type == "function":
+                    for tool_call in choice.message.tool_calls:
+                        output_messages.append(
+                            OpenAIResponseOutputMessageFunctionToolCall(
+                                arguments=tool_call.function.arguments or "",
+                                call_id=tool_call.id,
+                                name=tool_call.function.name or "",
+                                id=f"fc_{uuid.uuid4()}",
+                                status="completed",
+                            )
+                        )
+                else:
+                    tool_messages = await self._execute_tool_and_return_final_output(choice, ctx)
+                    output_messages.extend(tool_messages)
+            else:
+                output_messages.append(await _convert_chat_choice_to_response_message(choice))
+
+        return output_messages
+
+    async def _store_response(
+        self,
+        response: OpenAIResponseObject,
+        input: str | list[OpenAIResponseInput],
+    ) -> None:
+        new_input_id = f"msg_{uuid.uuid4()}"
+        if isinstance(input, str):
+            # synthesize a message from the input string
+            input_content = OpenAIResponseInputMessageContentText(text=input)
+            input_content_item = OpenAIResponseMessage(
+                role="user",
+                content=[input_content],
+                id=new_input_id,
+            )
+            input_items_data = [input_content_item]
+        else:
+            # we already have a list of messages
+            input_items_data = []
+            for input_item in input:
+                if isinstance(input_item, OpenAIResponseMessage):
+                    # These may or may not already have an id, so dump to dict, check for id, and add if missing
+                    input_item_dict = input_item.model_dump()
+                    if "id" not in input_item_dict:
+                        input_item_dict["id"] = new_input_id
+                    input_items_data.append(OpenAIResponseMessage(**input_item_dict))
+                else:
+                    input_items_data.append(input_item)
+
+        await self.responses_store.store_response_object(
+            response_object=response,
+            input=input_items_data,
+        )
+
+    async def create_openai_response(
+        self,
+        input: str | list[OpenAIResponseInput],
+        model: str,
+        instructions: str | None = None,
+        previous_response_id: str | None = None,
+        store: bool | None = True,
+        stream: bool | None = False,
+        temperature: float | None = None,
+        tools: list[OpenAIResponseInputTool] | None = None,
+    ):
+        stream = False if stream is None else stream
+
+        output_messages: list[OpenAIResponseOutput] = []
+
+        # Input preprocessing
+        input = await self._prepend_previous_response(input, previous_response_id)
+        messages = await _convert_response_input_to_chat_messages(input)
+        await self._prepend_instructions(messages, instructions)
+
+        # Tool setup
+        chat_tools, mcp_tool_to_server, mcp_list_message = (
+            await self._convert_response_tools_to_chat_tools(tools) if tools else (None, {}, None)
+        )
+        if mcp_list_message:
+            output_messages.append(mcp_list_message)
+
+        ctx = ChatCompletionContext(
+            model=model,
+            messages=messages,
+            tools=chat_tools,
+            mcp_tool_to_server=mcp_tool_to_server,
+            stream=stream,
+            temperature=temperature,
+        )
+
+        inference_result = await self.inference_api.openai_chat_completion(
+            model=model,
+            messages=messages,
+            tools=chat_tools,
+            stream=stream,
+            temperature=temperature,
+        )
+
+        if stream:
+            return self._create_streaming_response(
+                inference_result=inference_result,
+                ctx=ctx,
+                output_messages=output_messages,
+                input=input,
+                model=model,
+                store=store,
+                tools=tools,
+            )
+        else:
+            return await self._create_non_streaming_response(
+                inference_result=inference_result,
+                ctx=ctx,
+                output_messages=output_messages,
+                input=input,
+                model=model,
+                store=store,
+                tools=tools,
+            )
+
+    async def _create_non_streaming_response(
+        self,
+        inference_result: Any,
+        ctx: ChatCompletionContext,
+        output_messages: list[OpenAIResponseOutput],
+        input: str | list[OpenAIResponseInput],
+        model: str,
+        store: bool | None,
+        tools: list[OpenAIResponseInputTool] | None,
+    ) -> OpenAIResponseObject:
+        chat_response = OpenAIChatCompletion(**inference_result.model_dump())
+
+        # Process response choices (tool execution and message creation)
+        output_messages.extend(
+            await self._process_response_choices(
+                chat_response=chat_response,
+                ctx=ctx,
+                tools=tools,
+            )
+        )
+
+        response = OpenAIResponseObject(
+            created_at=chat_response.created,
+            id=f"resp-{uuid.uuid4()}",
+            model=model,
+            object="response",
+            status="completed",
+            output=output_messages,
+        )
+        logger.debug(f"OpenAI Responses response: {response}")
+
+        # Store response if requested
+        if store:
+            await self._store_response(
+                response=response,
+                input=input,
+            )
+
+        return response
+
+    async def _create_streaming_response(
+        self,
+        inference_result: Any,
+        ctx: ChatCompletionContext,
+        output_messages: list[OpenAIResponseOutput],
+        input: str | list[OpenAIResponseInput],
+        model: str,
+        store: bool | None,
+        tools: list[OpenAIResponseInputTool] | None,
+    ) -> AsyncIterator[OpenAIResponseObjectStream]:
+        # Create initial response and emit response.created immediately
+        response_id = f"resp-{uuid.uuid4()}"
+        created_at = int(time.time())
+
+        initial_response = OpenAIResponseObject(
+            created_at=created_at,
+            id=response_id,
+            model=model,
+            object="response",
+            status="in_progress",
+            output=output_messages.copy(),
+        )
+
+        # Emit response.created immediately
+        yield OpenAIResponseObjectStreamResponseCreated(response=initial_response)
+
+        # For streaming, inference_result is an async iterator of chunks
+        # Stream chunks and emit delta events as they arrive
+        chat_response_id = ""
+        chat_response_content = []
+        chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
+        chunk_created = 0
+        chunk_model = ""
+        chunk_finish_reason = ""
+        sequence_number = 0
+
+        # Create a placeholder message item for delta events
+        message_item_id = f"msg_{uuid.uuid4()}"
+
+        async for chunk in inference_result:
+            chat_response_id = chunk.id
+            chunk_created = chunk.created
+            chunk_model = chunk.model
+            for chunk_choice in chunk.choices:
+                # Emit incremental text content as delta events
+                if chunk_choice.delta.content:
+                    sequence_number += 1
+                    yield OpenAIResponseObjectStreamResponseOutputTextDelta(
+                        content_index=0,
+                        delta=chunk_choice.delta.content,
+                        item_id=message_item_id,
+                        output_index=0,
+                        sequence_number=sequence_number,
+                    )
+
+                # Collect content for final response
+                chat_response_content.append(chunk_choice.delta.content or "")
+                if chunk_choice.finish_reason:
+                    chunk_finish_reason = chunk_choice.finish_reason
+
+                # Aggregate tool call arguments across chunks, using their index as the aggregation key
+                if chunk_choice.delta.tool_calls:
+                    for tool_call in chunk_choice.delta.tool_calls:
+                        response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
+                        if response_tool_call:
+                            # Don't attempt to concatenate arguments if we don't have any new arguments
+                            if tool_call.function.arguments:
+                                # Guard against an initial None argument before we concatenate
+                                response_tool_call.function.arguments = (
+                                    response_tool_call.function.arguments or ""
+                                ) + tool_call.function.arguments
+                        else:
+                            tool_call_dict: dict[str, Any] = tool_call.model_dump()
+                            tool_call_dict.pop("type", None)
+                            response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
+                        chat_response_tool_calls[tool_call.index] = response_tool_call
+
+        # Convert collected chunks to complete response
+        if chat_response_tool_calls:
+            tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
+        else:
+            tool_calls = None
+        assistant_message = OpenAIAssistantMessageParam(
+            content="".join(chat_response_content),
+            tool_calls=tool_calls,
+        )
+        chat_response_obj = OpenAIChatCompletion(
+            id=chat_response_id,
+            choices=[
+                OpenAIChoice(
+                    message=assistant_message,
+                    finish_reason=chunk_finish_reason,
+                    index=0,
+                )
+            ],
+            created=chunk_created,
+            model=chunk_model,
+        )
+
+        # Process response choices (tool execution and message creation)
+        output_messages.extend(
+            await self._process_response_choices(
+                chat_response=chat_response_obj,
+                ctx=ctx,
+                tools=tools,
+            )
+        )
+
+        # Create final response
+        final_response = OpenAIResponseObject(
+            created_at=created_at,
+            id=response_id,
+            model=model,
+            object="response",
+            status="completed",
+            output=output_messages,
+        )
+
+        if store:
+            await self._store_response(
+                response=final_response,
+                input=input,
+            )
+
+        # Emit response.completed
+        yield OpenAIResponseObjectStreamResponseCompleted(response=final_response)
+
+    async def _convert_response_tools_to_chat_tools(
+        self, tools: list[OpenAIResponseInputTool]
+    ) -> tuple[
+        list[ChatCompletionToolParam],
+        dict[str, OpenAIResponseInputToolMCP],
+        OpenAIResponseOutput | None,
+    ]:
+        from llama_stack.apis.agents.openai_responses import (
+            MCPListToolsTool,
+        )
+        from llama_stack.apis.tools.tools import Tool
+
+        mcp_tool_to_server = {}
+
+        def make_openai_tool(tool_name: str, tool: Tool) -> ChatCompletionToolParam:
+            tool_def = ToolDefinition(
+                tool_name=tool_name,
+                description=tool.description,
+                parameters={
+                    param.name: ToolParamDefinition(
+                        param_type=param.parameter_type,
+                        description=param.description,
+                        required=param.required,
+                        default=param.default,
+                    )
+                    for param in tool.parameters
+                },
+            )
+            return convert_tooldef_to_openai_tool(tool_def)
+
+        mcp_list_message = None
+        chat_tools: list[ChatCompletionToolParam] = []
+        for input_tool in tools:
+            # TODO: Handle other tool types
+            if input_tool.type == "function":
+                chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
+            elif input_tool.type == "web_search":
+                tool_name = "web_search"
+                tool = await self.tool_groups_api.get_tool(tool_name)
+                if not tool:
+                    raise ValueError(f"Tool {tool_name} not found")
+                chat_tools.append(make_openai_tool(tool_name, tool))
+            elif input_tool.type == "mcp":
+                always_allowed = None
+                never_allowed = None
+                if input_tool.allowed_tools:
+                    if isinstance(input_tool.allowed_tools, list):
+                        always_allowed = input_tool.allowed_tools
+                    elif isinstance(input_tool.allowed_tools, AllowedToolsFilter):
+                        always_allowed = input_tool.allowed_tools.always
+                        never_allowed = input_tool.allowed_tools.never
+
+                tool_defs = await list_mcp_tools(
+                    endpoint=input_tool.server_url,
+                    headers=input_tool.headers or {},
+                )
+
+                mcp_list_message = OpenAIResponseOutputMessageMCPListTools(
+                    id=f"mcp_list_{uuid.uuid4()}",
+                    status="completed",
+                    server_label=input_tool.server_label,
+                    tools=[],
+                )
+                for t in tool_defs.data:
+                    if never_allowed and t.name in never_allowed:
+                        continue
+                    if not always_allowed or t.name in always_allowed:
+                        chat_tools.append(make_openai_tool(t.name, t))
+                        if t.name in mcp_tool_to_server:
+                            raise ValueError(f"Duplicate tool name {t.name} found for server {input_tool.server_label}")
+                        mcp_tool_to_server[t.name] = input_tool
+                        mcp_list_message.tools.append(
+                            MCPListToolsTool(
+                                name=t.name,
+                                description=t.description,
+                                input_schema={
+                                    "type": "object",
+                                    "properties": {
+                                        p.name: {
+                                            "type": p.parameter_type,
+                                            "description": p.description,
+                                        }
+                                        for p in t.parameters
+                                    },
+                                    "required": [p.name for p in t.parameters if p.required],
+                                },
+                            )
+                        )
+            else:
+                raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
+        return chat_tools, mcp_tool_to_server, mcp_list_message
+
+    async def _execute_tool_and_return_final_output(
+        self,
+        choice: OpenAIChoice,
+        ctx: ChatCompletionContext,
+    ) -> list[OpenAIResponseOutput]:
+        output_messages: list[OpenAIResponseOutput] = []
+
+        if not isinstance(choice.message, OpenAIAssistantMessageParam):
+            return output_messages
+
+        if not choice.message.tool_calls:
+            return output_messages
+
+        next_turn_messages = ctx.messages.copy()
+
+        # Add the assistant message with tool_calls response to the messages list
+        next_turn_messages.append(choice.message)
+
+        for tool_call in choice.message.tool_calls:
+            # TODO: telemetry spans for tool calls
+            tool_call_log, further_input = await self._execute_tool_call(tool_call, ctx)
+            if tool_call_log:
+                output_messages.append(tool_call_log)
+            if further_input:
+                next_turn_messages.append(further_input)
+
+        tool_results_chat_response = await self.inference_api.openai_chat_completion(
+            model=ctx.model,
+            messages=next_turn_messages,
+            stream=ctx.stream,
+            temperature=ctx.temperature,
+        )
+        # type cast to appease mypy: this is needed because we don't handle streaming properly :)
+        tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response)
+
+        # Huge TODO: these are NOT the final outputs, we must keep the loop going
+        tool_final_outputs = [
+            await _convert_chat_choice_to_response_message(choice) for choice in tool_results_chat_response.choices
+        ]
+        # TODO: Wire in annotations with URLs, titles, etc to these output messages
+        output_messages.extend(tool_final_outputs)
+        return output_messages
+
+    async def _execute_tool_call(
+        self,
+        tool_call: OpenAIChatCompletionToolCall,
+        ctx: ChatCompletionContext,
+    ) -> tuple[OpenAIResponseOutput | None, OpenAIMessageParam | None]:
+        from llama_stack.providers.utils.inference.prompt_adapter import (
+            interleaved_content_as_str,
+        )
+
+        tool_call_id = tool_call.id
+        function = tool_call.function
+
+        if not function or not tool_call_id or not function.name:
+            return None, None
+
+        error_exc = None
+        result = None
+        try:
+            if function.name in ctx.mcp_tool_to_server:
+                mcp_tool = ctx.mcp_tool_to_server[function.name]
+                result = await invoke_mcp_tool(
+                    endpoint=mcp_tool.server_url,
+                    headers=mcp_tool.headers or {},
+                    tool_name=function.name,
+                    kwargs=json.loads(function.arguments) if function.arguments else {},
+                )
+            else:
+                result = await self.tool_runtime_api.invoke_tool(
+                    tool_name=function.name,
+                    kwargs=json.loads(function.arguments) if function.arguments else {},
+                )
+        except Exception as e:
+            error_exc = e
+
+        if function.name in ctx.mcp_tool_to_server:
+            from llama_stack.apis.agents.openai_responses import OpenAIResponseOutputMessageMCPCall
+
+            message = OpenAIResponseOutputMessageMCPCall(
+                id=tool_call_id,
+                arguments=function.arguments,
+                name=function.name,
+                server_label=ctx.mcp_tool_to_server[function.name].server_label,
+            )
+            if error_exc:
+                message.error = str(error_exc)
+            elif (result.error_code and result.error_code > 0) or result.error_message:
+                message.error = f"Error (code {result.error_code}): {result.error_message}"
+            elif result.content:
+                message.output = interleaved_content_as_str(result.content)
+        else:
+            if function.name == "web_search":
+                message = OpenAIResponseOutputMessageWebSearchToolCall(
+                    id=tool_call_id,
+                    status="completed",
+                )
+                if error_exc or (result.error_code and result.error_code > 0) or result.error_message:
+                    message.status = "failed"
+            else:
+                raise ValueError(f"Unknown tool {function.name} called")
+
+        input_message = None
+        if result and result.content:
+            if isinstance(result.content, str):
+                content = result.content
+            elif isinstance(result.content, list):
+                from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem
+
+                content = []
+                for item in result.content:
+                    if isinstance(item, TextContentItem):
+                        part = OpenAIChatCompletionContentPartTextParam(text=item.text)
+                    elif isinstance(item, ImageContentItem):
+                        if item.image.data:
+                            url = f"data:image;base64,{item.image.data}"
+                        else:
+                            url = item.image.url
+                        part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url))
+                    else:
+                        raise ValueError(f"Unknown result content type: {type(item)}")
+                    content.append(part)
+            else:
+                raise ValueError(f"Unknown result content type: {type(result.content)}")
+            input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
+
+        return message, input_message
diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py
index 202d43609..5031a4a90 100644
--- a/llama_stack/providers/inline/agents/meta_reference/persistence.py
+++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py
@@ -8,11 +8,8 @@ import json
 import logging
 import uuid
 from datetime import datetime, timezone
-from typing import List, Optional
 
-from pydantic import BaseModel
-
-from llama_stack.apis.agents import ToolExecutionStep, Turn
+from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn
 from llama_stack.distribution.access_control import check_access
 from llama_stack.distribution.datatypes import AccessAttributes
 from llama_stack.distribution.request_headers import get_auth_attributes
@@ -21,13 +18,15 @@ from llama_stack.providers.utils.kvstore import KVStore
 log = logging.getLogger(__name__)
 
 
-class AgentSessionInfo(BaseModel):
-    session_id: str
-    session_name: str
+class AgentSessionInfo(Session):
     # TODO: is this used anywhere?
-    vector_db_id: Optional[str] = None
+    vector_db_id: str | None = None
     started_at: datetime
-    access_attributes: Optional[AccessAttributes] = None
+    access_attributes: AccessAttributes | None = None
+
+
+class AgentInfo(AgentConfig):
+    created_at: datetime
 
 
 class AgentPersistence:
@@ -47,6 +46,7 @@ class AgentPersistence:
             session_name=name,
             started_at=datetime.now(timezone.utc),
             access_attributes=access_attributes,
+            turns=[],
         )
 
         await self.kvstore.set(
@@ -55,7 +55,7 @@ class AgentPersistence:
         )
         return session_id
 
-    async def get_session_info(self, session_id: str) -> Optional[AgentSessionInfo]:
+    async def get_session_info(self, session_id: str) -> AgentSessionInfo | None:
         value = await self.kvstore.get(
             key=f"session:{self.agent_id}:{session_id}",
         )
@@ -78,7 +78,7 @@ class AgentPersistence:
 
         return check_access(session_info.session_id, session_info.access_attributes, get_auth_attributes())
 
-    async def get_session_if_accessible(self, session_id: str) -> Optional[AgentSessionInfo]:
+    async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None:
         """Get session info if the user has access to it. For internal use by sub-session methods."""
         session_info = await self.get_session_info(session_id)
         if not session_info:
@@ -106,11 +106,11 @@ class AgentPersistence:
             value=turn.model_dump_json(),
         )
 
-    async def get_session_turns(self, session_id: str) -> List[Turn]:
+    async def get_session_turns(self, session_id: str) -> list[Turn]:
         if not await self.get_session_if_accessible(session_id):
             raise ValueError(f"Session {session_id} not found or access denied")
 
-        values = await self.kvstore.range(
+        values = await self.kvstore.values_in_range(
             start_key=f"session:{self.agent_id}:{session_id}:",
             end_key=f"session:{self.agent_id}:{session_id}:\xff\xff\xff\xff",
         )
@@ -122,10 +122,9 @@ class AgentPersistence:
             except Exception as e:
                 log.error(f"Error parsing turn: {e}")
                 continue
-        turns.sort(key=lambda x: (x.completed_at or datetime.min))
         return turns
 
-    async def get_session_turn(self, session_id: str, turn_id: str) -> Optional[Turn]:
+    async def get_session_turn(self, session_id: str, turn_id: str) -> Turn | None:
         if not await self.get_session_if_accessible(session_id):
             raise ValueError(f"Session {session_id} not found or access denied")
 
@@ -145,7 +144,7 @@ class AgentPersistence:
             value=step.model_dump_json(),
         )
 
-    async def get_in_progress_tool_call_step(self, session_id: str, turn_id: str) -> Optional[ToolExecutionStep]:
+    async def get_in_progress_tool_call_step(self, session_id: str, turn_id: str) -> ToolExecutionStep | None:
         if not await self.get_session_if_accessible(session_id):
             return None
 
@@ -163,7 +162,7 @@ class AgentPersistence:
             value=str(num_infer_iters),
         )
 
-    async def get_num_infer_iters_in_turn(self, session_id: str, turn_id: str) -> Optional[int]:
+    async def get_num_infer_iters_in_turn(self, session_id: str, turn_id: str) -> int | None:
         if not await self.get_session_if_accessible(session_id):
             return None
 
@@ -171,3 +170,43 @@ class AgentPersistence:
             key=f"num_infer_iters_in_turn:{self.agent_id}:{session_id}:{turn_id}",
         )
         return int(value) if value else None
+
+    async def list_sessions(self) -> list[Session]:
+        values = await self.kvstore.values_in_range(
+            start_key=f"session:{self.agent_id}:",
+            end_key=f"session:{self.agent_id}:\xff\xff\xff\xff",
+        )
+        sessions = []
+        for value in values:
+            try:
+                session_info = Session(**json.loads(value))
+                sessions.append(session_info)
+            except Exception as e:
+                log.error(f"Error parsing session info: {e}")
+                continue
+        return sessions
+
+    async def delete_session_turns(self, session_id: str) -> None:
+        """Delete all turns and their associated data for a session.
+
+        Args:
+            session_id: The ID of the session whose turns should be deleted.
+        """
+        turns = await self.get_session_turns(session_id)
+        for turn in turns:
+            await self.kvstore.delete(key=f"session:{self.agent_id}:{session_id}:{turn.turn_id}")
+
+    async def delete_session(self, session_id: str) -> None:
+        """Delete a session and all its associated turns.
+
+        Args:
+            session_id: The ID of the session to delete.
+
+        Raises:
+            ValueError: If the session does not exist.
+        """
+        session_info = await self.get_session_info(session_id)
+        if session_info is None:
+            raise ValueError(f"Session {session_id} not found")
+
+        await self.kvstore.delete(key=f"session:{self.agent_id}:{session_id}")
diff --git a/llama_stack/providers/inline/agents/meta_reference/safety.py b/llama_stack/providers/inline/agents/meta_reference/safety.py
index bef16eaba..6b3573d8c 100644
--- a/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,7 +6,6 @@
 
 import asyncio
 import logging
-from typing import List
 
 from llama_stack.apis.inference import Message
 from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
@@ -25,14 +24,14 @@ class ShieldRunnerMixin:
     def __init__(
         self,
         safety_api: Safety,
-        input_shields: List[str] = None,
-        output_shields: List[str] = None,
+        input_shields: list[str] = None,
+        output_shields: list[str] = None,
     ):
         self.safety_api = safety_api
         self.input_shields = input_shields
         self.output_shields = output_shields
 
-    async def run_multiple_shields(self, messages: List[Message], identifiers: List[str]) -> None:
+    async def run_multiple_shields(self, messages: list[Message], identifiers: list[str]) -> None:
         async def run_shield_with_span(identifier: str):
             async with tracing.span(f"run_shield_{identifier}"):
                 return await self.safety_api.run_shield(
diff --git a/llama_stack/providers/inline/datasetio/localfs/__init__.py b/llama_stack/providers/inline/datasetio/localfs/__init__.py
index 5a0876d79..58aa6ffaf 100644
--- a/llama_stack/providers/inline/datasetio/localfs/__init__.py
+++ b/llama_stack/providers/inline/datasetio/localfs/__init__.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from .config import LocalFSDatasetIOConfig
 
 
 async def get_provider_impl(
     config: LocalFSDatasetIOConfig,
-    _deps: Dict[str, Any],
+    _deps: dict[str, Any],
 ):
     from .datasetio import LocalFSDatasetIOImpl
 
diff --git a/llama_stack/providers/inline/datasetio/localfs/config.py b/llama_stack/providers/inline/datasetio/localfs/config.py
index d74521f1f..b450e8777 100644
--- a/llama_stack/providers/inline/datasetio/localfs/config.py
+++ b/llama_stack/providers/inline/datasetio/localfs/config.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -17,7 +17,7 @@ class LocalFSDatasetIOConfig(BaseModel):
     kvstore: KVStoreConfig
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "kvstore": SqliteKVStoreConfig.sample_run_config(
                 __distro_dir__=__distro_dir__,
diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index e71107d61..da71ecb17 100644
--- a/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 import pandas
 
@@ -11,9 +11,9 @@ from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Dataset
 from llama_stack.providers.datatypes import DatasetsProtocolPrivate
-from llama_stack.providers.utils.datasetio.pagination import paginate_records
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
 from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.pagination import paginate_records
 
 from .config import LocalFSDatasetIOConfig
 
@@ -64,7 +64,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
         # Load existing datasets from kvstore
         start_key = DATASETS_PREFIX
         end_key = f"{DATASETS_PREFIX}\xff"
-        stored_datasets = await self.kvstore.range(start_key, end_key)
+        stored_datasets = await self.kvstore.values_in_range(start_key, end_key)
 
         for dataset in stored_datasets:
             dataset = Dataset.model_validate_json(dataset)
@@ -92,8 +92,8 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
     async def iterrows(
         self,
         dataset_id: str,
-        start_index: Optional[int] = None,
-        limit: Optional[int] = None,
+        start_index: int | None = None,
+        limit: int | None = None,
     ) -> PaginatedResponse:
         dataset_def = self.dataset_infos[dataset_id]
         dataset_impl = PandasDataframeDataset(dataset_def)
@@ -102,7 +102,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
         records = dataset_impl.df.to_dict("records")
         return paginate_records(records, start_index, limit)
 
-    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
+    async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
         dataset_def = self.dataset_infos[dataset_id]
         dataset_impl = PandasDataframeDataset(dataset_def)
         await dataset_impl.load()
diff --git a/llama_stack/providers/inline/eval/meta_reference/__init__.py b/llama_stack/providers/inline/eval/meta_reference/__init__.py
index e2a7fc2cd..7afe7f33b 100644
--- a/llama_stack/providers/inline/eval/meta_reference/__init__.py
+++ b/llama_stack/providers/inline/eval/meta_reference/__init__.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.distribution.datatypes import Api
 
@@ -12,7 +12,7 @@ from .config import MetaReferenceEvalConfig
 
 async def get_provider_impl(
     config: MetaReferenceEvalConfig,
-    deps: Dict[Api, Any],
+    deps: dict[Api, Any],
 ):
     from .eval import MetaReferenceEvalImpl
 
diff --git a/llama_stack/providers/inline/eval/meta_reference/config.py b/llama_stack/providers/inline/eval/meta_reference/config.py
index 5b2bec259..2a4a29998 100644
--- a/llama_stack/providers/inline/eval/meta_reference/config.py
+++ b/llama_stack/providers/inline/eval/meta_reference/config.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -17,7 +17,7 @@ class MetaReferenceEvalConfig(BaseModel):
     kvstore: KVStoreConfig
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "kvstore": SqliteKVStoreConfig.sample_run_config(
                 __distro_dir__=__distro_dir__,
diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py
index 7c28f1bb7..bc0898dc5 100644
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
-from typing import Any, Dict, List
+from typing import Any
 
 from tqdm import tqdm
 
@@ -58,7 +58,7 @@ class MetaReferenceEvalImpl(
         # Load existing benchmarks from kvstore
         start_key = EVAL_TASKS_PREFIX
         end_key = f"{EVAL_TASKS_PREFIX}\xff"
-        stored_benchmarks = await self.kvstore.range(start_key, end_key)
+        stored_benchmarks = await self.kvstore.values_in_range(start_key, end_key)
 
         for benchmark in stored_benchmarks:
             benchmark = Benchmark.model_validate_json(benchmark)
@@ -105,8 +105,8 @@ class MetaReferenceEvalImpl(
         return Job(job_id=job_id, status=JobStatus.completed)
 
     async def _run_agent_generation(
-        self, input_rows: List[Dict[str, Any]], benchmark_config: BenchmarkConfig
-    ) -> List[Dict[str, Any]]:
+        self, input_rows: list[dict[str, Any]], benchmark_config: BenchmarkConfig
+    ) -> list[dict[str, Any]]:
         candidate = benchmark_config.eval_candidate
         create_response = await self.agents_api.create_agent(candidate.config)
         agent_id = create_response.agent_id
@@ -148,8 +148,8 @@ class MetaReferenceEvalImpl(
         return generations
 
     async def _run_model_generation(
-        self, input_rows: List[Dict[str, Any]], benchmark_config: BenchmarkConfig
-    ) -> List[Dict[str, Any]]:
+        self, input_rows: list[dict[str, Any]], benchmark_config: BenchmarkConfig
+    ) -> list[dict[str, Any]]:
         candidate = benchmark_config.eval_candidate
         assert candidate.sampling_params.max_tokens is not None, "SamplingParams.max_tokens must be provided"
 
@@ -185,8 +185,8 @@ class MetaReferenceEvalImpl(
     async def evaluate_rows(
         self,
         benchmark_id: str,
-        input_rows: List[Dict[str, Any]],
-        scoring_functions: List[str],
+        input_rows: list[dict[str, Any]],
+        scoring_functions: list[str],
         benchmark_config: BenchmarkConfig,
     ) -> EvaluateResponse:
         candidate = benchmark_config.eval_candidate
diff --git a/llama_stack/providers/inline/inference/meta_reference/__init__.py b/llama_stack/providers/inline/inference/meta_reference/__init__.py
index 3710766e2..5eb822429 100644
--- a/llama_stack/providers/inline/inference/meta_reference/__init__.py
+++ b/llama_stack/providers/inline/inference/meta_reference/__init__.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from .config import MetaReferenceInferenceConfig
 
 
 async def get_provider_impl(
     config: MetaReferenceInferenceConfig,
-    _deps: Dict[str, Any],
+    _deps: dict[str, Any],
 ):
     from .inference import MetaReferenceInferenceImpl
 
diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py
index 6f796d0d4..7bc961443 100644
--- a/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, field_validator
 
@@ -17,11 +17,11 @@ class MetaReferenceInferenceConfig(BaseModel):
     # the actual inference model id is dtermined by the moddel id in the request
     # Note: you need to register the model before using it for inference
     # models in the resouce list in the run.yaml config will be registered automatically
-    model: Optional[str] = None
-    torch_seed: Optional[int] = None
+    model: str | None = None
+    torch_seed: int | None = None
     max_seq_len: int = 4096
     max_batch_size: int = 1
-    model_parallel_size: Optional[int] = None
+    model_parallel_size: int | None = None
 
     # when this is False, we assume that the distributed process group is setup by someone
     # outside of this code (e.g., when run inside `torchrun`). that is useful for clients
@@ -30,9 +30,9 @@ class MetaReferenceInferenceConfig(BaseModel):
 
     # By default, the implementation will look at ~/.llama/checkpoints/<model> but you
     # can override by specifying the directory explicitly
-    checkpoint_dir: Optional[str] = None
+    checkpoint_dir: str | None = None
 
-    quantization: Optional[QuantizationConfig] = None
+    quantization: QuantizationConfig | None = None
 
     @field_validator("model")
     @classmethod
@@ -55,7 +55,7 @@ class MetaReferenceInferenceConfig(BaseModel):
         max_batch_size: str = "${env.MAX_BATCH_SIZE:1}",
         max_seq_len: str = "${env.MAX_SEQ_LEN:4096}",
         **kwargs,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         return {
             "model": model,
             "checkpoint_dir": checkpoint_dir,
diff --git a/llama_stack/providers/inline/inference/meta_reference/generators.py b/llama_stack/providers/inline/inference/meta_reference/generators.py
index 0a928ce73..cb926f529 100644
--- a/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -5,7 +5,8 @@
 # the root directory of this source tree.
 
 import math
-from typing import Generator, List, Optional, Tuple
+from collections.abc import Generator
+from typing import Optional
 
 import torch
 from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
@@ -39,7 +40,7 @@ Tokenizer = Llama4Tokenizer | Llama3Tokenizer
 class LogitsProcessor:
     def __init__(self, token_enforcer: TokenEnforcer):
         self.token_enforcer = token_enforcer
-        self.mask: Optional[torch.Tensor] = None
+        self.mask: torch.Tensor | None = None
 
     def __call__(self, tokens: torch.Tensor, scores: torch.Tensor) -> torch.Tensor:
         token_sequence = tokens[0, :].tolist()
@@ -58,7 +59,7 @@ class LogitsProcessor:
 def get_logits_processor(
     tokenizer: Tokenizer,
     vocab_size: int,
-    response_format: Optional[ResponseFormat],
+    response_format: ResponseFormat | None,
 ) -> Optional["LogitsProcessor"]:
     if response_format is None:
         return None
@@ -76,7 +77,7 @@ def get_logits_processor(
     return LogitsProcessor(token_enforcer)
 
 
-def _build_regular_tokens_list(tokenizer: Tokenizer, vocab_size: int) -> List[Tuple[int, str, bool]]:
+def _build_regular_tokens_list(tokenizer: Tokenizer, vocab_size: int) -> list[tuple[int, str, bool]]:
     token_0 = tokenizer.encode("0", bos=False, eos=False)[-1]
     regular_tokens = []
 
@@ -158,7 +159,7 @@ class LlamaGenerator:
 
     def completion(
         self,
-        request_batch: List[CompletionRequestWithRawContent],
+        request_batch: list[CompletionRequestWithRawContent],
     ) -> Generator:
         first_request = request_batch[0]
         sampling_params = first_request.sampling_params or SamplingParams()
@@ -167,7 +168,7 @@ class LlamaGenerator:
             max_gen_len = self.args.max_seq_len - 1
 
         temperature, top_p = _infer_sampling_params(sampling_params)
-        for result in self.inner_generator.generate(
+        yield from self.inner_generator.generate(
             llm_inputs=[self.formatter.encode_content(request.content) for request in request_batch],
             max_gen_len=max_gen_len,
             temperature=temperature,
@@ -179,12 +180,11 @@ class LlamaGenerator:
                 self.args.vocab_size,
                 first_request.response_format,
             ),
-        ):
-            yield result
+        )
 
     def chat_completion(
         self,
-        request_batch: List[ChatCompletionRequestWithRawContent],
+        request_batch: list[ChatCompletionRequestWithRawContent],
     ) -> Generator:
         first_request = request_batch[0]
         sampling_params = first_request.sampling_params or SamplingParams()
@@ -193,7 +193,7 @@ class LlamaGenerator:
             max_gen_len = self.args.max_seq_len - 1
 
         temperature, top_p = _infer_sampling_params(sampling_params)
-        for result in self.inner_generator.generate(
+        yield from self.inner_generator.generate(
             llm_inputs=[
                 self.formatter.encode_dialog_prompt(request.messages, _infer_tool_prompt_format(request))
                 for request in request_batch
@@ -208,5 +208,4 @@ class LlamaGenerator:
                 self.args.vocab_size,
                 first_request.response_format,
             ),
-        ):
-            yield result
+        )
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py
index 0b56ba1f7..e238e1b78 100644
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -6,7 +6,8 @@
 
 import asyncio
 import os
-from typing import AsyncGenerator, List, Optional, Union
+import sys
+from collections.abc import AsyncGenerator
 
 from pydantic import BaseModel
 from termcolor import cprint
@@ -28,7 +29,7 @@ from llama_stack.apis.inference import (
     CompletionRequest,
     CompletionResponse,
     CompletionResponseStreamChunk,
-    Inference,
+    InferenceProvider,
     InterleavedContent,
     LogProbConfig,
     Message,
@@ -59,8 +60,8 @@ from llama_stack.providers.utils.inference.model_registry import (
     build_hf_repo_model_entry,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
     augment_content_with_response_format_prompt,
@@ -83,10 +84,10 @@ def llama_builder_fn(config: MetaReferenceInferenceConfig, model_id: str, llama_
 
 
 class MetaReferenceInferenceImpl(
-    OpenAICompletionUnsupportedMixin,
-    OpenAIChatCompletionUnsupportedMixin,
+    OpenAICompletionToLlamaStackMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
     SentenceTransformerEmbeddingMixin,
-    Inference,
+    InferenceProvider,
     ModelsProtocolPrivate,
 ):
     def __init__(self, config: MetaReferenceInferenceConfig) -> None:
@@ -184,11 +185,11 @@ class MetaReferenceInferenceImpl(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> Union[CompletionResponse, CompletionResponseStreamChunk]:
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+    ) -> CompletionResponse | CompletionResponseStreamChunk:
         if sampling_params is None:
             sampling_params = SamplingParams()
         if logprobs:
@@ -215,11 +216,11 @@ class MetaReferenceInferenceImpl(
     async def batch_completion(
         self,
         model_id: str,
-        content_batch: List[InterleavedContent],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        content_batch: list[InterleavedContent],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> BatchCompletionResponse:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -253,7 +254,8 @@ class MetaReferenceInferenceImpl(
         def impl():
             stop_reason = None
 
-            for token_result in self.generator.completion(request):
+            for token_results in self.generator.completion([request]):
+                token_result = token_results[0]
                 if token_result.token == tokenizer.eot_id:
                     stop_reason = StopReason.end_of_turn
                     text = ""
@@ -290,14 +292,14 @@ class MetaReferenceInferenceImpl(
             for x in impl():
                 yield x
 
-    async def _nonstream_completion(self, request_batch: List[CompletionRequest]) -> List[CompletionResponse]:
+    async def _nonstream_completion(self, request_batch: list[CompletionRequest]) -> list[CompletionResponse]:
         tokenizer = self.generator.formatter.tokenizer
 
         first_request = request_batch[0]
 
         class ItemState(BaseModel):
-            tokens: List[int] = []
-            logprobs: List[TokenLogProbs] = []
+            tokens: list[int] = []
+            logprobs: list[TokenLogProbs] = []
             stop_reason: StopReason | None = None
             finished: bool = False
 
@@ -348,15 +350,15 @@ class MetaReferenceInferenceImpl(
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -394,13 +396,13 @@ class MetaReferenceInferenceImpl(
     async def batch_chat_completion(
         self,
         model_id: str,
-        messages_batch: List[List[Message]],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages_batch: list[list[Message]],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        tools: list[ToolDefinition] | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> BatchChatCompletionResponse:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -435,15 +437,15 @@ class MetaReferenceInferenceImpl(
         return BatchChatCompletionResponse(batch=results)
 
     async def _nonstream_chat_completion(
-        self, request_batch: List[ChatCompletionRequest]
-    ) -> List[ChatCompletionResponse]:
+        self, request_batch: list[ChatCompletionRequest]
+    ) -> list[ChatCompletionResponse]:
         tokenizer = self.generator.formatter.tokenizer
 
         first_request = request_batch[0]
 
         class ItemState(BaseModel):
-            tokens: List[int] = []
-            logprobs: List[TokenLogProbs] = []
+            tokens: list[int] = []
+            logprobs: list[TokenLogProbs] = []
             stop_reason: StopReason | None = None
             finished: bool = False
 
@@ -454,9 +456,9 @@ class MetaReferenceInferenceImpl(
                 first = token_results[0]
                 if not first.finished and not first.ignore_token:
                     if os.environ.get("LLAMA_MODELS_DEBUG", "0") in ("1", "2"):
-                        cprint(first.text, "cyan", end="")
+                        cprint(first.text, color="cyan", end="", file=sys.stderr)
                     if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":
-                        cprint(f"<{first.token}>", "magenta", end="")
+                        cprint(f"<{first.token}>", color="magenta", end="", file=sys.stderr)
 
                 for result in token_results:
                     idx = result.batch_idx
@@ -515,11 +517,12 @@ class MetaReferenceInferenceImpl(
             stop_reason = None
             ipython = False
 
-            for token_result in self.generator.chat_completion(request):
+            for token_results in self.generator.chat_completion([request]):
+                token_result = token_results[0]
                 if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
-                    cprint(token_result.text, "cyan", end="")
+                    cprint(token_result.text, color="cyan", end="", file=sys.stderr)
                 if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":
-                    cprint(f"<{token_result.token}>", "magenta", end="")
+                    cprint(f"<{token_result.token}>", color="magenta", end="", file=sys.stderr)
 
                 if token_result.token == tokenizer.eot_id:
                     stop_reason = StopReason.end_of_turn
diff --git a/llama_stack/providers/inline/inference/meta_reference/model_parallel.py b/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
index 50640c6d1..9031d36b3 100644
--- a/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
+++ b/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
@@ -4,9 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from collections.abc import Callable, Generator
 from copy import deepcopy
 from functools import partial
-from typing import Any, Callable, Generator, List
+from typing import Any
 
 from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
 from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat
@@ -82,7 +83,7 @@ class LlamaModelParallelGenerator:
 
     def completion(
         self,
-        request_batch: List[CompletionRequestWithRawContent],
+        request_batch: list[CompletionRequestWithRawContent],
     ) -> Generator:
         req_obj = deepcopy(request_batch)
         gen = self.group.run_inference(("completion", req_obj))
@@ -90,7 +91,7 @@ class LlamaModelParallelGenerator:
 
     def chat_completion(
         self,
-        request_batch: List[ChatCompletionRequestWithRawContent],
+        request_batch: list[ChatCompletionRequestWithRawContent],
     ) -> Generator:
         req_obj = deepcopy(request_batch)
         gen = self.group.run_inference(("chat_completion", req_obj))
diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
index 8752f06f3..97e96b929 100644
--- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
+++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
@@ -18,8 +18,9 @@ import os
 import tempfile
 import time
 import uuid
+from collections.abc import Callable, Generator
 from enum import Enum
-from typing import Callable, Generator, List, Literal, Optional, Tuple, Union
+from typing import Annotated, Literal
 
 import torch
 import zmq
@@ -30,7 +31,6 @@ from fairscale.nn.model_parallel.initialize import (
 )
 from pydantic import BaseModel, Field
 from torch.distributed.launcher.api import LaunchConfig, elastic_launch
-from typing_extensions import Annotated
 
 from llama_stack.models.llama.datatypes import GenerationResult
 from llama_stack.providers.utils.inference.prompt_adapter import (
@@ -69,12 +69,15 @@ class CancelSentinel(BaseModel):
 
 class TaskRequest(BaseModel):
     type: Literal[ProcessingMessageName.task_request] = ProcessingMessageName.task_request
-    task: Tuple[str, List[CompletionRequestWithRawContent] | List[ChatCompletionRequestWithRawContent]]
+    task: tuple[
+        str,
+        list[CompletionRequestWithRawContent] | list[ChatCompletionRequestWithRawContent],
+    ]
 
 
 class TaskResponse(BaseModel):
     type: Literal[ProcessingMessageName.task_response] = ProcessingMessageName.task_response
-    result: List[GenerationResult]
+    result: list[GenerationResult]
 
 
 class ExceptionResponse(BaseModel):
@@ -82,15 +85,9 @@ class ExceptionResponse(BaseModel):
     error: str
 
 
-ProcessingMessage = Union[
-    ReadyRequest,
-    ReadyResponse,
-    EndSentinel,
-    CancelSentinel,
-    TaskRequest,
-    TaskResponse,
-    ExceptionResponse,
-]
+ProcessingMessage = (
+    ReadyRequest | ReadyResponse | EndSentinel | CancelSentinel | TaskRequest | TaskResponse | ExceptionResponse
+)
 
 
 class ProcessingMessageWrapper(BaseModel):
@@ -200,7 +197,7 @@ def maybe_get_work(sock: zmq.Socket):
     return client_id, message
 
 
-def maybe_parse_message(maybe_json: Optional[str]) -> Optional[ProcessingMessage]:
+def maybe_parse_message(maybe_json: str | None) -> ProcessingMessage | None:
     if maybe_json is None:
         return None
     try:
@@ -231,10 +228,10 @@ def worker_process_entrypoint(
     while True:
         try:
             task = req_gen.send(result)
-            if isinstance(task, str) and task == EndSentinel():
+            if isinstance(task, EndSentinel):
                 break
 
-            assert isinstance(task, TaskRequest)
+            assert isinstance(task, TaskRequest), task
             result = model(task.task)
         except StopIteration:
             break
@@ -331,7 +328,10 @@ class ModelParallelProcessGroup:
 
     def run_inference(
         self,
-        req: Tuple[str, List[CompletionRequestWithRawContent] | List[ChatCompletionRequestWithRawContent]],
+        req: tuple[
+            str,
+            list[CompletionRequestWithRawContent] | list[ChatCompletionRequestWithRawContent],
+        ],
     ) -> Generator:
         assert not self.running, "inference already running"
 
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/__init__.py b/llama_stack/providers/inline/inference/sentence_transformers/__init__.py
index c1d65d10c..1719cbacc 100644
--- a/llama_stack/providers/inline/inference/sentence_transformers/__init__.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.providers.inline.inference.sentence_transformers.config import (
     SentenceTransformersInferenceConfig,
@@ -13,7 +13,7 @@ from llama_stack.providers.inline.inference.sentence_transformers.config import
 
 async def get_provider_impl(
     config: SentenceTransformersInferenceConfig,
-    _deps: Dict[str, Any],
+    _deps: dict[str, Any],
 ):
     from .sentence_transformers import SentenceTransformersInferenceImpl
 
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/config.py b/llama_stack/providers/inline/inference/sentence_transformers/config.py
index 93e0afe11..b03010b10 100644
--- a/llama_stack/providers/inline/inference/sentence_transformers/config.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/config.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
 
 class SentenceTransformersInferenceConfig(BaseModel):
     @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {}
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index 5bc20e3c2..890c526f5 100644
--- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -5,11 +5,11 @@
 # the root directory of this source tree.
 
 import logging
-from typing import AsyncGenerator, List, Optional, Union
+from collections.abc import AsyncGenerator
 
 from llama_stack.apis.inference import (
     CompletionResponse,
-    Inference,
+    InferenceProvider,
     InterleavedContent,
     LogProbConfig,
     Message,
@@ -25,8 +25,8 @@ from llama_stack.providers.utils.inference.embedding_mixin import (
     SentenceTransformerEmbeddingMixin,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
 )
 
 from .config import SentenceTransformersInferenceConfig
@@ -35,10 +35,10 @@ log = logging.getLogger(__name__)
 
 
 class SentenceTransformersInferenceImpl(
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
     SentenceTransformerEmbeddingMixin,
-    Inference,
+    InferenceProvider,
     ModelsProtocolPrivate,
 ):
     def __init__(self, config: SentenceTransformersInferenceConfig) -> None:
@@ -60,46 +60,46 @@ class SentenceTransformersInferenceImpl(
         self,
         model_id: str,
         content: str,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> Union[CompletionResponse, AsyncGenerator]:
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+    ) -> CompletionResponse | AsyncGenerator:
         raise ValueError("Sentence transformers don't support completion")
 
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> AsyncGenerator:
         raise ValueError("Sentence transformers don't support chat completion")
 
     async def batch_completion(
         self,
         model_id: str,
-        content_batch: List[InterleavedContent],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        content_batch: list[InterleavedContent],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ):
         raise NotImplementedError("Batch completion is not supported for Sentence Transformers")
 
     async def batch_chat_completion(
         self,
         model_id: str,
-        messages_batch: List[List[Message]],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        messages_batch: list[list[Message]],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_config: ToolConfig | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ):
         raise NotImplementedError("Batch chat completion is not supported for Sentence Transformers")
diff --git a/llama_stack/providers/inline/inference/vllm/__init__.py b/llama_stack/providers/inline/inference/vllm/__init__.py
index bd0551e57..d0ec3e084 100644
--- a/llama_stack/providers/inline/inference/vllm/__init__.py
+++ b/llama_stack/providers/inline/inference/vllm/__init__.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from .config import VLLMConfig
 
 
-async def get_provider_impl(config: VLLMConfig, _deps: Dict[str, Any]):
+async def get_provider_impl(config: VLLMConfig, _deps: dict[str, Any]):
     from .vllm import VLLMInferenceImpl
 
     impl = VLLMInferenceImpl(config)
diff --git a/llama_stack/providers/inline/inference/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py
index 51d48e6d5..ce8743c74 100644
--- a/llama_stack/providers/inline/inference/vllm/config.py
+++ b/llama_stack/providers/inline/inference/vllm/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -42,7 +42,7 @@ class VLLMConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
             "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}",
             "max_tokens": "${env.MAX_TOKENS:4096}",
diff --git a/llama_stack/providers/inline/inference/vllm/openai_utils.py b/llama_stack/providers/inline/inference/vllm/openai_utils.py
index d34f5ad5f..77cbf0403 100644
--- a/llama_stack/providers/inline/inference/vllm/openai_utils.py
+++ b/llama_stack/providers/inline/inference/vllm/openai_utils.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List, Optional
 
 import vllm
 
@@ -55,8 +54,8 @@ def _merge_context_into_content(message: Message) -> Message:  # type: ignore
 
 
 def _llama_stack_tools_to_openai_tools(
-    tools: Optional[List[ToolDefinition]] = None,
-) -> List[vllm.entrypoints.openai.protocol.ChatCompletionToolsParam]:
+    tools: list[ToolDefinition] | None = None,
+) -> list[vllm.entrypoints.openai.protocol.ChatCompletionToolsParam]:
     """
     Convert the list of available tools from Llama Stack's format to vLLM's
     version of OpenAI's format.
diff --git a/llama_stack/providers/inline/inference/vllm/vllm.py b/llama_stack/providers/inline/inference/vllm/vllm.py
index 085c79d6b..bf54462b5 100644
--- a/llama_stack/providers/inline/inference/vllm/vllm.py
+++ b/llama_stack/providers/inline/inference/vllm/vllm.py
@@ -7,7 +7,7 @@
 import json
 import re
 import uuid
-from typing import AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
+from collections.abc import AsyncGenerator, AsyncIterator
 
 # These vLLM modules contain names that overlap with Llama Stack names, so we import
 # fully-qualified names
@@ -40,6 +40,7 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -66,10 +67,10 @@ from llama_stack.providers.utils.inference.model_registry import (
     ModelsProtocolPrivate,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
     OpenAICompatCompletionChoice,
     OpenAICompatCompletionResponse,
-    OpenAICompletionUnsupportedMixin,
+    OpenAICompletionToLlamaStackMixin,
     get_stop_reason,
     process_chat_completion_stream_response,
 )
@@ -100,7 +101,7 @@ def _random_uuid_str() -> str:
 
 
 def _response_format_to_guided_decoding_params(
-    response_format: Optional[ResponseFormat],  # type: ignore
+    response_format: ResponseFormat | None,  # type: ignore
 ) -> vllm.sampling_params.GuidedDecodingParams:
     """
     Translate constrained decoding parameters from Llama Stack's format to vLLM's format.
@@ -131,9 +132,9 @@ def _response_format_to_guided_decoding_params(
 
 
 def _convert_sampling_params(
-    sampling_params: Optional[SamplingParams],
-    response_format: Optional[ResponseFormat],  # type: ignore
-    log_prob_config: Optional[LogProbConfig],
+    sampling_params: SamplingParams | None,
+    response_format: ResponseFormat | None,  # type: ignore
+    log_prob_config: LogProbConfig | None,
 ) -> vllm.SamplingParams:
     """Convert sampling and constrained decoding configuration from Llama Stack's format to vLLM's
     format."""
@@ -176,8 +177,8 @@ def _convert_sampling_params(
 
 class VLLMInferenceImpl(
     Inference,
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
     ModelsProtocolPrivate,
 ):
     """
@@ -370,11 +371,11 @@ class VLLMInferenceImpl(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+    ) -> CompletionResponse | AsyncIterator[CompletionResponseStreamChunk]:
         if model_id not in self.model_ids:
             raise ValueError(
                 f"This adapter is not registered to model id '{model_id}'. Registered IDs are: {self.model_ids}"
@@ -403,25 +404,35 @@ class VLLMInferenceImpl(
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         raise NotImplementedError()
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],  # type: ignore
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,  # type: ignore
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],  # type: ignore
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,  # type: ignore
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> ChatCompletionResponse | ChatCompletionResponseStreamChunk:
         sampling_params = sampling_params or SamplingParams()
         if model_id not in self.model_ids:
@@ -605,7 +616,7 @@ class VLLMInferenceImpl(
 
     async def _chat_completion_for_meta_llama(
         self, request: ChatCompletionRequest
-    ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
+    ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
         """
         Subroutine that routes chat completions for Meta Llama models through Llama Stack's
         chat template instead of using vLLM's version of that template. The Llama Stack version
@@ -701,7 +712,7 @@ class VLLMInferenceImpl(
         # Tool calls come in pieces, but Llama Stack expects them in bigger chunks. We build up
         # those chunks and output them at the end.
         # This data structure holds the current set of partial tool calls.
-        index_to_tool_call: Dict[int, Dict] = dict()
+        index_to_tool_call: dict[int, dict] = dict()
 
         # The Llama Stack event stream must always start with a start event. Use an empty one to
         # simplify logic below
diff --git a/llama_stack/providers/inline/post_training/common/utils.py b/llama_stack/providers/inline/post_training/common/utils.py
new file mode 100644
index 000000000..7840b21e8
--- /dev/null
+++ b/llama_stack/providers/inline/post_training/common/utils.py
@@ -0,0 +1,35 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import gc
+
+
+def evacuate_model_from_device(model, device: str):
+    """Safely clear a model from memory and free device resources.
+    This function handles the proper cleanup of a model by:
+    1. Moving the model to CPU if it's on a non-CPU device
+    2. Deleting the model object to free memory
+    3. Running garbage collection
+    4. Clearing CUDA cache if the model was on a CUDA device
+    Args:
+        model: The PyTorch model to clear
+        device: The device type the model is currently on ('cuda', 'mps', 'cpu')
+    Note:
+        - For CUDA devices, this will clear the CUDA cache after moving the model to CPU
+        - For MPS devices, only moves the model to CPU (no cache clearing available)
+        - For CPU devices, only deletes the model object and runs garbage collection
+    """
+    if device != "cpu":
+        model.to("cpu")
+
+    del model
+    gc.collect()
+
+    if device == "cuda":
+        # we need to import such that this is only imported when the method is called
+        import torch
+
+        torch.cuda.empty_cache()
diff --git a/llama_stack/providers/inline/post_training/common/validator.py b/llama_stack/providers/inline/post_training/common/validator.py
index b0aec6187..950b75f86 100644
--- a/llama_stack/providers/inline/post_training/common/validator.py
+++ b/llama_stack/providers/inline/post_training/common/validator.py
@@ -17,10 +17,8 @@ from llama_stack.apis.common.type_system import (
     DialogType,
     StringType,
 )
-from llama_stack.apis.datasets import Datasets
 from llama_stack.providers.utils.common.data_schema_validator import (
     ColumnName,
-    validate_dataset_schema,
 )
 
 EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = {
@@ -36,21 +34,3 @@ EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = {
         }
     ],
 }
-
-
-async def validate_input_dataset_schema(
-    datasets_api: Datasets,
-    dataset_id: str,
-    dataset_type: str,
-) -> None:
-    dataset_def = await datasets_api.get_dataset(dataset_id=dataset_id)
-    if not dataset_def:
-        raise ValueError(f"Dataset {dataset_id} does not exist.")
-
-    if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0:
-        raise ValueError(f"Dataset {dataset_id} does not have a schema defined.")
-
-    if dataset_type not in EXPECTED_DATASET_SCHEMA:
-        raise ValueError(f"Dataset type {dataset_type} is not supported.")
-
-    validate_dataset_schema(dataset_def.dataset_schema, EXPECTED_DATASET_SCHEMA[dataset_type])
diff --git a/llama_stack/providers/inline/post_training/huggingface/__init__.py b/llama_stack/providers/inline/post_training/huggingface/__init__.py
new file mode 100644
index 000000000..cc1a671c1
--- /dev/null
+++ b/llama_stack/providers/inline/post_training/huggingface/__init__.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.distribution.datatypes import Api
+
+from .config import HuggingFacePostTrainingConfig
+
+# post_training api and the huggingface provider is still experimental and under heavy development
+
+
+async def get_provider_impl(
+    config: HuggingFacePostTrainingConfig,
+    deps: dict[Api, Any],
+):
+    from .post_training import HuggingFacePostTrainingImpl
+
+    impl = HuggingFacePostTrainingImpl(
+        config,
+        deps[Api.datasetio],
+        deps[Api.datasets],
+    )
+    return impl
diff --git a/llama_stack/providers/inline/post_training/huggingface/config.py b/llama_stack/providers/inline/post_training/huggingface/config.py
new file mode 100644
index 000000000..06c6d8073
--- /dev/null
+++ b/llama_stack/providers/inline/post_training/huggingface/config.py
@@ -0,0 +1,72 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Literal
+
+from pydantic import BaseModel
+
+
+class HuggingFacePostTrainingConfig(BaseModel):
+    # Device to run training on (cuda, cpu, mps)
+    device: str = "cuda"
+
+    # Distributed training backend if using multiple devices
+    # fsdp: Fully Sharded Data Parallel
+    # deepspeed: DeepSpeed ZeRO optimization
+    distributed_backend: Literal["fsdp", "deepspeed"] | None = None
+
+    # Format for saving model checkpoints
+    # full_state: Save complete model state
+    # huggingface: Save in HuggingFace format (recommended for compatibility)
+    checkpoint_format: Literal["full_state", "huggingface"] | None = "huggingface"
+
+    # Template for formatting chat inputs and outputs
+    # Used to structure the conversation format for training
+    chat_template: str = "<|user|>\n{input}\n<|assistant|>\n{output}"
+
+    # Model-specific configuration parameters
+    # trust_remote_code: Allow execution of custom model code
+    # attn_implementation: Use SDPA (Scaled Dot Product Attention) for better performance
+    model_specific_config: dict = {
+        "trust_remote_code": True,
+        "attn_implementation": "sdpa",
+    }
+
+    # Maximum sequence length for training
+    # Set to 2048 as this is the maximum that works reliably on MPS (Apple Silicon)
+    # Longer sequences may cause memory issues on MPS devices
+    max_seq_length: int = 2048
+
+    # Enable gradient checkpointing to reduce memory usage
+    # Trades computation for memory by recomputing activations
+    gradient_checkpointing: bool = False
+
+    # Maximum number of checkpoints to keep
+    # Older checkpoints are deleted when this limit is reached
+    save_total_limit: int = 3
+
+    # Number of training steps between logging updates
+    logging_steps: int = 10
+
+    # Ratio of training steps used for learning rate warmup
+    # Helps stabilize early training
+    warmup_ratio: float = 0.1
+
+    # L2 regularization coefficient
+    # Helps prevent overfitting
+    weight_decay: float = 0.01
+
+    # Number of worker processes for data loading
+    # Higher values can improve data loading speed but increase memory usage
+    dataloader_num_workers: int = 4
+
+    # Whether to pin memory in data loader
+    # Can improve data transfer speed to GPU but uses more memory
+    dataloader_pin_memory: bool = True
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
+        return {"checkpoint_format": "huggingface", "distributed_backend": None, "device": "cpu"}
diff --git a/llama_stack/providers/inline/post_training/huggingface/post_training.py b/llama_stack/providers/inline/post_training/huggingface/post_training.py
new file mode 100644
index 000000000..0b2760792
--- /dev/null
+++ b/llama_stack/providers/inline/post_training/huggingface/post_training.py
@@ -0,0 +1,176 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from enum import Enum
+from typing import Any
+
+from llama_stack.apis.datasetio import DatasetIO
+from llama_stack.apis.datasets import Datasets
+from llama_stack.apis.post_training import (
+    AlgorithmConfig,
+    Checkpoint,
+    DPOAlignmentConfig,
+    JobStatus,
+    ListPostTrainingJobsResponse,
+    PostTrainingJob,
+    PostTrainingJobArtifactsResponse,
+    PostTrainingJobStatusResponse,
+    TrainingConfig,
+)
+from llama_stack.providers.inline.post_training.huggingface.config import (
+    HuggingFacePostTrainingConfig,
+)
+from llama_stack.providers.inline.post_training.huggingface.recipes.finetune_single_device import (
+    HFFinetuningSingleDevice,
+)
+from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
+from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
+from llama_stack.schema_utils import webmethod
+
+
+class TrainingArtifactType(Enum):
+    CHECKPOINT = "checkpoint"
+    RESOURCES_STATS = "resources_stats"
+
+
+_JOB_TYPE_SUPERVISED_FINE_TUNE = "supervised-fine-tune"
+
+
+class HuggingFacePostTrainingImpl:
+    def __init__(
+        self,
+        config: HuggingFacePostTrainingConfig,
+        datasetio_api: DatasetIO,
+        datasets: Datasets,
+    ) -> None:
+        self.config = config
+        self.datasetio_api = datasetio_api
+        self.datasets_api = datasets
+        self._scheduler = Scheduler()
+
+    async def shutdown(self) -> None:
+        await self._scheduler.shutdown()
+
+    @staticmethod
+    def _checkpoint_to_artifact(checkpoint: Checkpoint) -> JobArtifact:
+        return JobArtifact(
+            type=TrainingArtifactType.CHECKPOINT.value,
+            name=checkpoint.identifier,
+            uri=checkpoint.path,
+            metadata=dict(checkpoint),
+        )
+
+    @staticmethod
+    def _resources_stats_to_artifact(resources_stats: dict[str, Any]) -> JobArtifact:
+        return JobArtifact(
+            type=TrainingArtifactType.RESOURCES_STATS.value,
+            name=TrainingArtifactType.RESOURCES_STATS.value,
+            metadata=resources_stats,
+        )
+
+    async def supervised_fine_tune(
+        self,
+        job_uuid: str,
+        training_config: TrainingConfig,
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
+        model: str,
+        checkpoint_dir: str | None = None,
+        algorithm_config: AlgorithmConfig | None = None,
+    ) -> PostTrainingJob:
+        async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb):
+            on_log_message_cb("Starting HF finetuning")
+
+            recipe = HFFinetuningSingleDevice(
+                job_uuid=job_uuid,
+                datasetio_api=self.datasetio_api,
+                datasets_api=self.datasets_api,
+            )
+
+            resources_allocated, checkpoints = await recipe.train(
+                model=model,
+                output_dir=checkpoint_dir,
+                job_uuid=job_uuid,
+                lora_config=algorithm_config,
+                config=training_config,
+                provider_config=self.config,
+            )
+
+            on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated))
+            if checkpoints:
+                for checkpoint in checkpoints:
+                    artifact = self._checkpoint_to_artifact(checkpoint)
+                    on_artifact_collected_cb(artifact)
+
+            on_status_change_cb(SchedulerJobStatus.completed)
+            on_log_message_cb("HF finetuning completed")
+
+        job_uuid = self._scheduler.schedule(_JOB_TYPE_SUPERVISED_FINE_TUNE, job_uuid, handler)
+        return PostTrainingJob(job_uuid=job_uuid)
+
+    async def preference_optimize(
+        self,
+        job_uuid: str,
+        finetuned_model: str,
+        algorithm_config: DPOAlignmentConfig,
+        training_config: TrainingConfig,
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
+    ) -> PostTrainingJob:
+        raise NotImplementedError("DPO alignment is not implemented yet")
+
+    async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
+        return ListPostTrainingJobsResponse(
+            data=[PostTrainingJob(job_uuid=job.id) for job in self._scheduler.get_jobs()]
+        )
+
+    @staticmethod
+    def _get_artifacts_metadata_by_type(job, artifact_type):
+        return [artifact.metadata for artifact in job.artifacts if artifact.type == artifact_type]
+
+    @classmethod
+    def _get_checkpoints(cls, job):
+        return cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.CHECKPOINT.value)
+
+    @classmethod
+    def _get_resources_allocated(cls, job):
+        data = cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.RESOURCES_STATS.value)
+        return data[0] if data else None
+
+    @webmethod(route="/post-training/job/status")
+    async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse | None:
+        job = self._scheduler.get_job(job_uuid)
+
+        match job.status:
+            # TODO: Add support for other statuses to API
+            case SchedulerJobStatus.new | SchedulerJobStatus.scheduled:
+                status = JobStatus.scheduled
+            case SchedulerJobStatus.running:
+                status = JobStatus.in_progress
+            case SchedulerJobStatus.completed:
+                status = JobStatus.completed
+            case SchedulerJobStatus.failed:
+                status = JobStatus.failed
+            case _:
+                raise NotImplementedError()
+
+        return PostTrainingJobStatusResponse(
+            job_uuid=job_uuid,
+            status=status,
+            scheduled_at=job.scheduled_at,
+            started_at=job.started_at,
+            completed_at=job.completed_at,
+            checkpoints=self._get_checkpoints(job),
+            resources_allocated=self._get_resources_allocated(job),
+        )
+
+    @webmethod(route="/post-training/job/cancel")
+    async def cancel_training_job(self, job_uuid: str) -> None:
+        self._scheduler.cancel(job_uuid)
+
+    @webmethod(route="/post-training/job/artifacts")
+    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse | None:
+        job = self._scheduler.get_job(job_uuid)
+        return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=self._get_checkpoints(job))
diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
new file mode 100644
index 000000000..b6d13b029
--- /dev/null
+++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@@ -0,0 +1,683 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import gc
+import json
+import logging
+import multiprocessing
+import os
+import signal
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+import psutil
+
+from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
+
+# Set tokenizer parallelism environment variable
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+# Force PyTorch to use OpenBLAS instead of MKL
+os.environ["MKL_THREADING_LAYER"] = "GNU"
+os.environ["MKL_SERVICE_FORCE_INTEL"] = "0"
+os.environ["MKL_NUM_THREADS"] = "1"
+
+import torch
+from datasets import Dataset
+from peft import LoraConfig
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoTokenizer,
+)
+from trl import SFTConfig, SFTTrainer
+
+from llama_stack.apis.datasetio import DatasetIO
+from llama_stack.apis.datasets import Datasets
+from llama_stack.apis.post_training import (
+    Checkpoint,
+    DataConfig,
+    LoraFinetuningConfig,
+    TrainingConfig,
+)
+
+from ..config import HuggingFacePostTrainingConfig
+
+logger = logging.getLogger(__name__)
+
+
+def get_gb(to_convert: int) -> str:
+    """Converts memory stats to GB and formats to 2 decimal places.
+    Args:
+        to_convert: Memory value in bytes
+    Returns:
+        str: Memory value in GB formatted to 2 decimal places
+    """
+    return f"{(to_convert / (1024**3)):.2f}"
+
+
+def get_memory_stats(device: torch.device) -> dict[str, Any]:
+    """Get memory statistics for the given device."""
+    stats = {
+        "system_memory": {
+            "total": get_gb(psutil.virtual_memory().total),
+            "available": get_gb(psutil.virtual_memory().available),
+            "used": get_gb(psutil.virtual_memory().used),
+            "percent": psutil.virtual_memory().percent,
+        }
+    }
+
+    if device.type == "cuda":
+        stats["device_memory"] = {
+            "allocated": get_gb(torch.cuda.memory_allocated(device)),
+            "reserved": get_gb(torch.cuda.memory_reserved(device)),
+            "max_allocated": get_gb(torch.cuda.max_memory_allocated(device)),
+        }
+    elif device.type == "mps":
+        # MPS doesn't provide direct memory stats, but we can track system memory
+        stats["device_memory"] = {
+            "note": "MPS memory stats not directly available",
+            "system_memory_used": get_gb(psutil.virtual_memory().used),
+        }
+    elif device.type == "cpu":
+        # For CPU, we track process memory usage
+        process = psutil.Process()
+        stats["device_memory"] = {
+            "process_rss": get_gb(process.memory_info().rss),
+            "process_vms": get_gb(process.memory_info().vms),
+            "process_percent": process.memory_percent(),
+        }
+
+    return stats
+
+
+def setup_torch_device(device_str: str) -> torch.device:
+    """Initialize and validate a PyTorch device.
+    This function handles device initialization and validation for different device types:
+    - CUDA: Validates CUDA availability and handles device selection
+    - MPS: Validates MPS availability for Apple Silicon
+    - CPU: Basic validation
+    - HPU: Raises error as it's not supported
+    Args:
+        device_str: String specifying the device ('cuda', 'cpu', 'mps')
+    Returns:
+        torch.device: The initialized and validated device
+    Raises:
+        RuntimeError: If device initialization fails or device is not supported
+    """
+    try:
+        device = torch.device(device_str)
+    except RuntimeError as e:
+        raise RuntimeError(f"Error getting Torch Device {str(e)}") from e
+
+    # Validate device capabilities
+    if device.type == "cuda":
+        if not torch.cuda.is_available():
+            raise RuntimeError(
+                f"{device.type}: Torch has no CUDA/ROCm support or could not detect a compatible device."
+            )
+        if device.index is None:
+            device = torch.device(device.type, torch.cuda.current_device())
+    elif device.type == "mps":
+        if not torch.backends.mps.is_available():
+            raise RuntimeError(f"{device.type}: Torch has no MPS support or could not detect a compatible device.")
+    elif device.type == "hpu":
+        raise RuntimeError(f"{device.type}: training does not support Intel Gaudi.")
+
+    return device
+
+
+class HFFinetuningSingleDevice:
+    def __init__(
+        self,
+        job_uuid: str,
+        datasetio_api: DatasetIO,
+        datasets_api: Datasets,
+    ):
+        self.datasetio_api = datasetio_api
+        self.datasets_api = datasets_api
+        self.job_uuid = job_uuid
+
+    def validate_dataset_format(self, rows: list[dict]) -> bool:
+        """Validate that the dataset has the required fields."""
+        required_fields = ["input_query", "expected_answer", "chat_completion_input"]
+        return all(field in row for row in rows for field in required_fields)
+
+    def _process_instruct_format(self, row: dict) -> tuple[str | None, str | None]:
+        """Process a row in instruct format."""
+        if "chat_completion_input" in row and "expected_answer" in row:
+            try:
+                messages = json.loads(row["chat_completion_input"])
+                if not isinstance(messages, list) or len(messages) != 1:
+                    logger.warning(f"Invalid chat_completion_input format: {row['chat_completion_input']}")
+                    return None, None
+                if "content" not in messages[0]:
+                    logger.warning(f"Message missing content: {messages[0]}")
+                    return None, None
+                return messages[0]["content"], row["expected_answer"]
+            except json.JSONDecodeError:
+                logger.warning(f"Failed to parse chat_completion_input: {row['chat_completion_input']}")
+                return None, None
+        return None, None
+
+    def _process_dialog_format(self, row: dict) -> tuple[str | None, str | None]:
+        """Process a row in dialog format."""
+        if "dialog" in row:
+            try:
+                dialog = json.loads(row["dialog"])
+                if not isinstance(dialog, list) or len(dialog) < 2:
+                    logger.warning(f"Dialog must have at least 2 messages: {row['dialog']}")
+                    return None, None
+                if dialog[0].get("role") != "user":
+                    logger.warning(f"First message must be from user: {dialog[0]}")
+                    return None, None
+                if not any(msg.get("role") == "assistant" for msg in dialog):
+                    logger.warning("Dialog must have at least one assistant message")
+                    return None, None
+
+                # Convert to human/gpt format
+                role_map = {"user": "human", "assistant": "gpt"}
+                conversations = []
+                for msg in dialog:
+                    if "role" not in msg or "content" not in msg:
+                        logger.warning(f"Message missing role or content: {msg}")
+                        continue
+                    conversations.append({"from": role_map[msg["role"]], "value": msg["content"]})
+
+                # Format as a single conversation
+                return conversations[0]["value"], conversations[1]["value"]
+            except json.JSONDecodeError:
+                logger.warning(f"Failed to parse dialog: {row['dialog']}")
+                return None, None
+        return None, None
+
+    def _process_fallback_format(self, row: dict) -> tuple[str | None, str | None]:
+        """Process a row using fallback formats."""
+        if "input" in row and "output" in row:
+            return row["input"], row["output"]
+        elif "prompt" in row and "completion" in row:
+            return row["prompt"], row["completion"]
+        elif "question" in row and "answer" in row:
+            return row["question"], row["answer"]
+        return None, None
+
+    def _format_text(self, input_text: str, output_text: str, provider_config: HuggingFacePostTrainingConfig) -> str:
+        """Format input and output text based on model requirements."""
+        if hasattr(provider_config, "chat_template"):
+            return provider_config.chat_template.format(input=input_text, output=output_text)
+        return f"{input_text}\n{output_text}"
+
+    def _create_dataset(
+        self, rows: list[dict], config: TrainingConfig, provider_config: HuggingFacePostTrainingConfig
+    ) -> Dataset:
+        """Create and preprocess the dataset."""
+        formatted_rows = []
+        for row in rows:
+            input_text = None
+            output_text = None
+
+            # Process based on format
+            assert isinstance(config.data_config, DataConfig), "DataConfig must be initialized"
+            if config.data_config.data_format.value == "instruct":
+                input_text, output_text = self._process_instruct_format(row)
+            elif config.data_config.data_format.value == "dialog":
+                input_text, output_text = self._process_dialog_format(row)
+            else:
+                input_text, output_text = self._process_fallback_format(row)
+
+            if input_text and output_text:
+                formatted_text = self._format_text(input_text, output_text, provider_config)
+                formatted_rows.append({"text": formatted_text})
+
+        if not formatted_rows:
+            assert isinstance(config.data_config, DataConfig), "DataConfig must be initialized"
+            raise ValueError(
+                f"No valid input/output pairs found in the dataset for format: {config.data_config.data_format.value}"
+            )
+
+        return Dataset.from_list(formatted_rows)
+
+    def _preprocess_dataset(
+        self, ds: Dataset, tokenizer: AutoTokenizer, provider_config: HuggingFacePostTrainingConfig
+    ) -> Dataset:
+        """Preprocess the dataset with tokenizer."""
+
+        def tokenize_function(examples):
+            return tokenizer(
+                examples["text"],
+                padding=True,
+                truncation=True,
+                max_length=provider_config.max_seq_length,
+                return_tensors=None,
+            )
+
+        return ds.map(
+            tokenize_function,
+            batched=True,
+            remove_columns=ds.column_names,
+        )
+
+    async def _setup_data(self, dataset_id: str) -> list[dict[str, Any]]:
+        """Load dataset from llama stack dataset provider"""
+        try:
+            all_rows = await self.datasetio_api.iterrows(
+                dataset_id=dataset_id,
+                limit=-1,
+            )
+            if not isinstance(all_rows.data, list):
+                raise RuntimeError("Expected dataset data to be a list")
+            return all_rows.data
+        except Exception as e:
+            raise RuntimeError(f"Failed to load dataset: {str(e)}") from e
+
+    def _run_training_sync(
+        self,
+        model: str,
+        provider_config: dict[str, Any],
+        peft_config: LoraConfig | None,
+        config: dict[str, Any],
+        output_dir_path: Path | None,
+    ) -> None:
+        """Synchronous wrapper for running training process.
+        This method serves as a bridge between the multiprocessing Process and the async training function.
+        It creates a new event loop to run the async training process.
+        Args:
+            model: The model identifier to load
+            dataset_id: ID of the dataset to use for training
+            provider_config: Configuration specific to the HuggingFace provider
+            peft_config: Optional LoRA configuration
+            config: General training configuration
+            output_dir_path: Optional path to save the model
+        """
+        import asyncio
+
+        logger.info("Starting training process with async wrapper")
+        asyncio.run(
+            self._run_training(
+                model=model,
+                provider_config=provider_config,
+                peft_config=peft_config,
+                config=config,
+                output_dir_path=output_dir_path,
+            )
+        )
+
+    async def load_dataset(
+        self,
+        model: str,
+        config: TrainingConfig,
+        provider_config: HuggingFacePostTrainingConfig,
+    ) -> tuple[Dataset, Dataset, AutoTokenizer]:
+        """Load and prepare the dataset for training.
+        Args:
+            model: The model identifier to load
+            config: Training configuration
+            provider_config: Provider-specific configuration
+        Returns:
+            tuple: (train_dataset, eval_dataset, tokenizer)
+        """
+        # Validate data config
+        if not config.data_config:
+            raise ValueError("DataConfig is required for training")
+
+        # Load dataset
+        logger.info(f"Loading dataset: {config.data_config.dataset_id}")
+        rows = await self._setup_data(config.data_config.dataset_id)
+        if not self.validate_dataset_format(rows):
+            raise ValueError("Dataset is missing required fields: input_query, expected_answer, chat_completion_input")
+        logger.info(f"Loaded {len(rows)} rows from dataset")
+
+        # Initialize tokenizer
+        logger.info(f"Initializing tokenizer for model: {model}")
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(model, **provider_config.model_specific_config)
+
+            # Set pad token to eos token if not present
+            # This is common for models that don't have a dedicated pad token
+            if not tokenizer.pad_token:
+                tokenizer.pad_token = tokenizer.eos_token
+
+            # Set padding side to right for causal language modeling
+            # This ensures that padding tokens don't interfere with the model's ability
+            # to predict the next token in the sequence
+            tokenizer.padding_side = "right"
+
+            # Set truncation side to right to keep the beginning of the sequence
+            # This is important for maintaining context and instruction format
+            tokenizer.truncation_side = "right"
+
+            # Set model max length to match provider config
+            # This ensures consistent sequence lengths across the training process
+            tokenizer.model_max_length = provider_config.max_seq_length
+
+            logger.info("Tokenizer initialized successfully")
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize tokenizer: {str(e)}") from e
+
+        # Create and preprocess dataset
+        logger.info("Creating and preprocessing dataset")
+        try:
+            ds = self._create_dataset(rows, config, provider_config)
+            ds = self._preprocess_dataset(ds, tokenizer, provider_config)
+            logger.info(f"Dataset created with {len(ds)} examples")
+        except Exception as e:
+            raise ValueError(f"Failed to create dataset: {str(e)}") from e
+
+        # Split dataset
+        logger.info("Splitting dataset into train and validation sets")
+        train_val_split = ds.train_test_split(test_size=0.1, seed=42)
+        train_dataset = train_val_split["train"]
+        eval_dataset = train_val_split["test"]
+        logger.info(f"Split dataset into {len(train_dataset)} training and {len(eval_dataset)} validation examples")
+
+        return train_dataset, eval_dataset, tokenizer
+
+    def load_model(
+        self,
+        model: str,
+        device: torch.device,
+        provider_config: HuggingFacePostTrainingConfig,
+    ) -> AutoModelForCausalLM:
+        """Load and initialize the model for training.
+        Args:
+            model: The model identifier to load
+            device: The device to load the model onto
+            provider_config: Provider-specific configuration
+        Returns:
+            The loaded and initialized model
+        Raises:
+            RuntimeError: If model loading fails
+        """
+        logger.info("Loading the base model")
+        try:
+            model_config = AutoConfig.from_pretrained(model, **provider_config.model_specific_config)
+            model_obj = AutoModelForCausalLM.from_pretrained(
+                model,
+                torch_dtype="auto" if device.type != "cpu" else "float32",
+                quantization_config=None,
+                config=model_config,
+                **provider_config.model_specific_config,
+            )
+            # Always move model to specified device
+            model_obj = model_obj.to(device)
+            logger.info(f"Model loaded and moved to device: {model_obj.device}")
+            return model_obj
+        except Exception as e:
+            raise RuntimeError(f"Failed to load model: {str(e)}") from e
+
+    def setup_training_args(
+        self,
+        config: TrainingConfig,
+        provider_config: HuggingFacePostTrainingConfig,
+        device: torch.device,
+        output_dir_path: Path | None,
+        steps_per_epoch: int,
+    ) -> SFTConfig:
+        """Setup training arguments.
+        Args:
+            config: Training configuration
+            provider_config: Provider-specific configuration
+            device: The device to train on
+            output_dir_path: Optional path to save the model
+            steps_per_epoch: Number of steps per epoch
+        Returns:
+            Configured SFTConfig object
+        """
+        logger.info("Configuring training arguments")
+        lr = 2e-5
+        if config.optimizer_config:
+            lr = config.optimizer_config.lr
+            logger.info(f"Using custom learning rate: {lr}")
+
+        # Validate data config
+        if not config.data_config:
+            raise ValueError("DataConfig is required for training")
+        data_config = config.data_config
+
+        # Calculate steps
+        total_steps = steps_per_epoch * config.n_epochs
+        max_steps = min(config.max_steps_per_epoch, total_steps)
+        eval_steps = max(1, steps_per_epoch // 10)  # Evaluate 10 times per epoch
+        save_steps = max(1, steps_per_epoch // 5)  # Save 5 times per epoch
+        logging_steps = max(1, steps_per_epoch // 50)  # Log 50 times per epoch
+
+        logger.info("Training configuration:")
+        logger.info(f"- Steps per epoch: {steps_per_epoch}")
+        logger.info(f"- Total steps: {total_steps}")
+        logger.info(f"- Max steps: {max_steps}")
+        logger.info(f"- Eval steps: {eval_steps}")
+        logger.info(f"- Save steps: {save_steps}")
+        logger.info(f"- Logging steps: {logging_steps}")
+
+        # Configure save strategy
+        save_strategy = "no"
+        if output_dir_path:
+            save_strategy = "steps"
+            logger.info(f"Will save checkpoints to {output_dir_path}")
+
+        return SFTConfig(
+            max_steps=max_steps,
+            output_dir=str(output_dir_path) if output_dir_path is not None else None,
+            num_train_epochs=config.n_epochs,
+            per_device_train_batch_size=data_config.batch_size,
+            fp16=device.type == "cuda",
+            bf16=False,  # Causes CPU issues.
+            eval_strategy="steps",
+            use_cpu=True if device.type == "cpu" and not torch.backends.mps.is_available() else False,
+            save_strategy=save_strategy,
+            report_to="none",
+            max_seq_length=provider_config.max_seq_length,
+            gradient_accumulation_steps=config.gradient_accumulation_steps,
+            gradient_checkpointing=provider_config.gradient_checkpointing,
+            learning_rate=lr,
+            warmup_ratio=provider_config.warmup_ratio,
+            weight_decay=provider_config.weight_decay,
+            remove_unused_columns=False,
+            dataloader_pin_memory=provider_config.dataloader_pin_memory,
+            dataloader_num_workers=provider_config.dataloader_num_workers,
+            dataset_text_field="text",
+            packing=False,
+            load_best_model_at_end=True if output_dir_path else False,
+            metric_for_best_model="eval_loss",
+            greater_is_better=False,
+            eval_steps=eval_steps,
+            save_steps=save_steps,
+            logging_steps=logging_steps,
+        )
+
+    def save_model(
+        self,
+        model_obj: AutoModelForCausalLM,
+        trainer: SFTTrainer,
+        peft_config: LoraConfig | None,
+        output_dir_path: Path,
+    ) -> None:
+        """Save the trained model.
+        Args:
+            model_obj: The model to save
+            trainer: The trainer instance
+            peft_config: Optional LoRA configuration
+            output_dir_path: Path to save the model
+        """
+        logger.info("Saving final model")
+        model_obj.config.use_cache = True
+
+        if peft_config:
+            logger.info("Merging LoRA weights with base model")
+            model_obj = trainer.model.merge_and_unload()
+        else:
+            model_obj = trainer.model
+
+        save_path = output_dir_path / "merged_model"
+        logger.info(f"Saving model to {save_path}")
+        model_obj.save_pretrained(save_path)
+
+    async def _run_training(
+        self,
+        model: str,
+        provider_config: dict[str, Any],
+        peft_config: LoraConfig | None,
+        config: dict[str, Any],
+        output_dir_path: Path | None,
+    ) -> None:
+        """Run the training process with signal handling."""
+
+        def signal_handler(signum, frame):
+            """Handle termination signals gracefully."""
+            logger.info(f"Received signal {signum}, initiating graceful shutdown")
+            sys.exit(0)
+
+        signal.signal(signal.SIGTERM, signal_handler)
+        signal.signal(signal.SIGINT, signal_handler)
+
+        # Convert config dicts back to objects
+        logger.info("Initializing configuration objects")
+        provider_config_obj = HuggingFacePostTrainingConfig(**provider_config)
+        config_obj = TrainingConfig(**config)
+
+        # Initialize and validate device
+        device = setup_torch_device(provider_config_obj.device)
+        logger.info(f"Using device '{device}'")
+
+        # Load dataset and tokenizer
+        train_dataset, eval_dataset, tokenizer = await self.load_dataset(model, config_obj, provider_config_obj)
+
+        # Calculate steps per epoch
+        if not config_obj.data_config:
+            raise ValueError("DataConfig is required for training")
+        steps_per_epoch = len(train_dataset) // config_obj.data_config.batch_size
+
+        # Setup training arguments
+        training_args = self.setup_training_args(
+            config_obj,
+            provider_config_obj,
+            device,
+            output_dir_path,
+            steps_per_epoch,
+        )
+
+        # Load model
+        model_obj = self.load_model(model, device, provider_config_obj)
+
+        # Initialize trainer
+        logger.info("Initializing SFTTrainer")
+        trainer = SFTTrainer(
+            model=model_obj,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            peft_config=peft_config,
+            args=training_args,
+        )
+
+        try:
+            # Train
+            logger.info("Starting training")
+            trainer.train()
+            logger.info("Training completed successfully")
+
+            # Save final model if output directory is provided
+            if output_dir_path:
+                self.save_model(model_obj, trainer, peft_config, output_dir_path)
+
+        finally:
+            # Clean up resources
+            logger.info("Cleaning up resources")
+            if hasattr(trainer, "model"):
+                evacuate_model_from_device(trainer.model, device.type)
+            del trainer
+            gc.collect()
+            logger.info("Cleanup completed")
+
+    async def train(
+        self,
+        model: str,
+        output_dir: str | None,
+        job_uuid: str,
+        lora_config: LoraFinetuningConfig,
+        config: TrainingConfig,
+        provider_config: HuggingFacePostTrainingConfig,
+    ) -> tuple[dict[str, Any], list[Checkpoint] | None]:
+        """Train a model using HuggingFace's SFTTrainer"""
+        # Initialize and validate device
+        device = setup_torch_device(provider_config.device)
+        logger.info(f"Using device '{device}'")
+
+        output_dir_path = None
+        if output_dir:
+            output_dir_path = Path(output_dir)
+
+        # Track memory stats
+        memory_stats = {
+            "initial": get_memory_stats(device),
+            "after_training": None,
+            "final": None,
+        }
+
+        # Configure LoRA
+        peft_config = None
+        if lora_config:
+            peft_config = LoraConfig(
+                lora_alpha=lora_config.alpha,
+                lora_dropout=0.1,
+                r=lora_config.rank,
+                bias="none",
+                task_type="CAUSAL_LM",
+                target_modules=lora_config.lora_attn_modules,
+            )
+
+        # Validate data config
+        if not config.data_config:
+            raise ValueError("DataConfig is required for training")
+
+        # Train in a separate process
+        logger.info("Starting training in separate process")
+        try:
+            # Set multiprocessing start method to 'spawn' for CUDA/MPS compatibility
+            if device.type in ["cuda", "mps"]:
+                multiprocessing.set_start_method("spawn", force=True)
+
+            process = multiprocessing.Process(
+                target=self._run_training_sync,
+                kwargs={
+                    "model": model,
+                    "provider_config": provider_config.model_dump(),
+                    "peft_config": peft_config,
+                    "config": config.model_dump(),
+                    "output_dir_path": output_dir_path,
+                },
+            )
+            process.start()
+
+            # Monitor the process
+            while process.is_alive():
+                process.join(timeout=1)  # Check every second
+                if not process.is_alive():
+                    break
+
+            # Get the return code
+            if process.exitcode != 0:
+                raise RuntimeError(f"Training failed with exit code {process.exitcode}")
+
+            memory_stats["after_training"] = get_memory_stats(device)
+
+            checkpoints = None
+            if output_dir_path:
+                # Create checkpoint
+                checkpoint = Checkpoint(
+                    identifier=f"{model}-sft-{config.n_epochs}",
+                    created_at=datetime.now(timezone.utc),
+                    epoch=config.n_epochs,
+                    post_training_job_id=job_uuid,
+                    path=str(output_dir_path / "merged_model"),
+                )
+                checkpoints = [checkpoint]
+
+            return memory_stats, checkpoints
+        finally:
+            memory_stats["final"] = get_memory_stats(device)
+            gc.collect()
diff --git a/llama_stack/providers/inline/post_training/torchtune/__init__.py b/llama_stack/providers/inline/post_training/torchtune/__init__.py
index ca7801be7..7a2f9eba2 100644
--- a/llama_stack/providers/inline/post_training/torchtune/__init__.py
+++ b/llama_stack/providers/inline/post_training/torchtune/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.distribution.datatypes import Api
 
@@ -15,7 +15,7 @@ from .config import TorchtunePostTrainingConfig
 
 async def get_provider_impl(
     config: TorchtunePostTrainingConfig,
-    deps: Dict[Api, Any],
+    deps: dict[Api, Any],
 ):
     from .post_training import TorchtunePostTrainingImpl
 
diff --git a/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py b/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
index fcadd0884..af8bd2765 100644
--- a/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
+++ b/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
@@ -8,7 +8,7 @@ import json
 import os
 import shutil
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Any
 
 import torch
 from safetensors.torch import save_file
@@ -34,7 +34,7 @@ class TorchtuneCheckpointer:
         model_id: str,
         training_algorithm: str,
         checkpoint_dir: str,
-        checkpoint_files: List[str],
+        checkpoint_files: list[str],
         output_dir: str,
         model_type: str,
     ):
@@ -54,11 +54,11 @@ class TorchtuneCheckpointer:
         # get ckpt paths
         self._checkpoint_path = Path.joinpath(self._checkpoint_dir, self._checkpoint_file)
 
-    def load_checkpoint(self) -> Dict[str, Any]:
+    def load_checkpoint(self) -> dict[str, Any]:
         """
         Load Meta checkpoint from file. Currently only loading from a single file is supported.
         """
-        state_dict: Dict[str, Any] = {}
+        state_dict: dict[str, Any] = {}
         model_state_dict = safe_torch_load(self._checkpoint_path)
         if self._model_type == ModelType.LLAMA3_VISION:
             from torchtune.models.llama3_2_vision._convert_weights import (
@@ -82,7 +82,7 @@ class TorchtuneCheckpointer:
 
     def save_checkpoint(
         self,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         epoch: int,
         adapter_only: bool = False,
         checkpoint_format: str | None = None,
@@ -100,7 +100,7 @@ class TorchtuneCheckpointer:
     def _save_meta_format_checkpoint(
         self,
         model_file_path: Path,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
         adapter_only: bool = False,
     ) -> None:
         model_file_path.mkdir(parents=True, exist_ok=True)
@@ -168,7 +168,7 @@ class TorchtuneCheckpointer:
     def _save_hf_format_checkpoint(
         self,
         model_file_path: Path,
-        state_dict: Dict[str, Any],
+        state_dict: dict[str, Any],
     ) -> None:
         # the config.json file contains model params needed for state dict conversion
         config = json.loads(Path.joinpath(self._checkpoint_dir.parent, "config.json").read_text())
@@ -179,7 +179,7 @@ class TorchtuneCheckpointer:
         repo_id_path = Path.joinpath(self._checkpoint_dir.parent, REPO_ID_FNAME).with_suffix(".json")
         self.repo_id = None
         if repo_id_path.exists():
-            with open(repo_id_path, "r") as json_file:
+            with open(repo_id_path) as json_file:
                 data = json.load(json_file)
                 self.repo_id = data.get("repo_id")
 
diff --git a/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/llama_stack/providers/inline/post_training/torchtune/common/utils.py
index a040ca1b0..f0fa052a2 100644
--- a/llama_stack/providers/inline/post_training/torchtune/common/utils.py
+++ b/llama_stack/providers/inline/post_training/torchtune/common/utils.py
@@ -10,7 +10,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Callable, Dict
+from collections.abc import Callable
 
 import torch
 from pydantic import BaseModel
@@ -35,7 +35,7 @@ class ModelConfig(BaseModel):
     checkpoint_type: str
 
 
-MODEL_CONFIGS: Dict[str, ModelConfig] = {
+MODEL_CONFIGS: dict[str, ModelConfig] = {
     "Llama3.2-3B-Instruct": ModelConfig(
         model_definition=lora_llama3_2_3b,
         tokenizer_type=llama3_tokenizer,
@@ -48,7 +48,7 @@ MODEL_CONFIGS: Dict[str, ModelConfig] = {
     ),
 }
 
-DATA_FORMATS: Dict[str, Transform] = {
+DATA_FORMATS: dict[str, Transform] = {
     "instruct": InputOutputToMessages,
     "dialog": ShareGPTToMessages,
 }
diff --git a/llama_stack/providers/inline/post_training/torchtune/config.py b/llama_stack/providers/inline/post_training/torchtune/config.py
index ee3504f9e..f3ce874aa 100644
--- a/llama_stack/providers/inline/post_training/torchtune/config.py
+++ b/llama_stack/providers/inline/post_training/torchtune/config.py
@@ -4,17 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Literal, Optional
+from typing import Any, Literal
 
 from pydantic import BaseModel
 
 
 class TorchtunePostTrainingConfig(BaseModel):
-    torch_seed: Optional[int] = None
-    checkpoint_format: Optional[Literal["meta", "huggingface"]] = "meta"
+    torch_seed: int | None = None
+    checkpoint_format: Literal["meta", "huggingface"] | None = "meta"
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "checkpoint_format": "meta",
         }
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
index 6b607f1c7..96dd8b8dd 100644
--- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
+++ b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
@@ -11,7 +11,8 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
-from typing import Any, Mapping
+from collections.abc import Mapping
+from typing import Any
 
 from llama_stack.providers.utils.common.data_schema_validator import ColumnName
 
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py b/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py
index 050996860..ae7faf31e 100644
--- a/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py
+++ b/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py
@@ -10,7 +10,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Any, Dict, List, Mapping
+from collections.abc import Mapping
+from typing import Any
 
 import numpy as np
 from torch.utils.data import Dataset
@@ -27,7 +28,7 @@ from llama_stack.providers.inline.post_training.torchtune.datasets.format_adapte
 class SFTDataset(Dataset):
     def __init__(
         self,
-        rows: List[Dict[str, Any]],
+        rows: list[dict[str, Any]],
         message_transform: Transform,
         model_transform: Transform,
         dataset_type: str,
@@ -40,11 +41,11 @@ class SFTDataset(Dataset):
     def __len__(self):
         return len(self._rows)
 
-    def __getitem__(self, index: int) -> Dict[str, Any]:
+    def __getitem__(self, index: int) -> dict[str, Any]:
         sample = self._rows[index]
         return self._prepare_sample(sample)
 
-    def _prepare_sample(self, sample: Mapping[str, Any]) -> Dict[str, Any]:
+    def _prepare_sample(self, sample: Mapping[str, Any]) -> dict[str, Any]:
         if self._dataset_type == "instruct":
             sample = llama_stack_instruct_to_torchtune_instruct(sample)
         elif self._dataset_type == "dialog":
diff --git a/llama_stack/providers/inline/post_training/torchtune/post_training.py b/llama_stack/providers/inline/post_training/torchtune/post_training.py
index 2c129ef41..c7d8d6758 100644
--- a/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/llama_stack/providers/inline/post_training/torchtune/post_training.py
@@ -3,13 +3,14 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from datetime import datetime, timezone
-from typing import Any, Dict, Optional
+from enum import Enum
+from typing import Any
 
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
 from llama_stack.apis.post_training import (
     AlgorithmConfig,
+    Checkpoint,
     DPOAlignmentConfig,
     JobStatus,
     ListPostTrainingJobsResponse,
@@ -25,9 +26,19 @@ from llama_stack.providers.inline.post_training.torchtune.config import (
 from llama_stack.providers.inline.post_training.torchtune.recipes.lora_finetuning_single_device import (
     LoraFinetuningSingleDevice,
 )
+from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
+from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
 from llama_stack.schema_utils import webmethod
 
 
+class TrainingArtifactType(Enum):
+    CHECKPOINT = "checkpoint"
+    RESOURCES_STATS = "resources_stats"
+
+
+_JOB_TYPE_SUPERVISED_FINE_TUNE = "supervised-fine-tune"
+
+
 class TorchtunePostTrainingImpl:
     def __init__(
         self,
@@ -38,38 +49,43 @@ class TorchtunePostTrainingImpl:
         self.config = config
         self.datasetio_api = datasetio_api
         self.datasets_api = datasets
+        self._scheduler = Scheduler()
 
-        # TODO: assume sync job, will need jobs API for async scheduling
-        self.jobs = {}
-        self.checkpoints_dict = {}
+    async def shutdown(self) -> None:
+        await self._scheduler.shutdown()
 
-    async def shutdown(self):
-        pass
+    @staticmethod
+    def _checkpoint_to_artifact(checkpoint: Checkpoint) -> JobArtifact:
+        return JobArtifact(
+            type=TrainingArtifactType.CHECKPOINT.value,
+            name=checkpoint.identifier,
+            uri=checkpoint.path,
+            metadata=dict(checkpoint),
+        )
+
+    @staticmethod
+    def _resources_stats_to_artifact(resources_stats: dict[str, Any]) -> JobArtifact:
+        return JobArtifact(
+            type=TrainingArtifactType.RESOURCES_STATS.value,
+            name=TrainingArtifactType.RESOURCES_STATS.value,
+            metadata=resources_stats,
+        )
 
     async def supervised_fine_tune(
         self,
         job_uuid: str,
         training_config: TrainingConfig,
-        hyperparam_search_config: Dict[str, Any],
-        logger_config: Dict[str, Any],
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
         model: str,
-        checkpoint_dir: Optional[str],
-        algorithm_config: Optional[AlgorithmConfig],
+        checkpoint_dir: str | None,
+        algorithm_config: AlgorithmConfig | None,
     ) -> PostTrainingJob:
-        if job_uuid in self.jobs:
-            raise ValueError(f"Job {job_uuid} already exists")
-
-        post_training_job = PostTrainingJob(job_uuid=job_uuid)
-
-        job_status_response = PostTrainingJobStatusResponse(
-            job_uuid=job_uuid,
-            status=JobStatus.scheduled,
-            scheduled_at=datetime.now(timezone.utc),
-        )
-        self.jobs[job_uuid] = job_status_response
-
         if isinstance(algorithm_config, LoraFinetuningConfig):
-            try:
+
+            async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb):
+                on_log_message_cb("Starting Lora finetuning")
+
                 recipe = LoraFinetuningSingleDevice(
                     self.config,
                     job_uuid,
@@ -82,26 +98,22 @@ class TorchtunePostTrainingImpl:
                     self.datasetio_api,
                     self.datasets_api,
                 )
-
-                job_status_response.status = JobStatus.in_progress
-                job_status_response.started_at = datetime.now(timezone.utc)
-
                 await recipe.setup()
+
                 resources_allocated, checkpoints = await recipe.train()
 
-                self.checkpoints_dict[job_uuid] = checkpoints
-                job_status_response.resources_allocated = resources_allocated
-                job_status_response.checkpoints = checkpoints
-                job_status_response.status = JobStatus.completed
-                job_status_response.completed_at = datetime.now(timezone.utc)
+                on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated))
+                for checkpoint in checkpoints:
+                    artifact = self._checkpoint_to_artifact(checkpoint)
+                    on_artifact_collected_cb(artifact)
 
-            except Exception:
-                job_status_response.status = JobStatus.failed
-                raise
+                on_status_change_cb(SchedulerJobStatus.completed)
+                on_log_message_cb("Lora finetuning completed")
         else:
             raise NotImplementedError()
 
-        return post_training_job
+        job_uuid = self._scheduler.schedule(_JOB_TYPE_SUPERVISED_FINE_TUNE, job_uuid, handler)
+        return PostTrainingJob(job_uuid=job_uuid)
 
     async def preference_optimize(
         self,
@@ -109,24 +121,60 @@ class TorchtunePostTrainingImpl:
         finetuned_model: str,
         algorithm_config: DPOAlignmentConfig,
         training_config: TrainingConfig,
-        hyperparam_search_config: Dict[str, Any],
-        logger_config: Dict[str, Any],
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
     ) -> PostTrainingJob: ...
 
     async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
-        return ListPostTrainingJobsResponse(data=[PostTrainingJob(job_uuid=uuid_) for uuid_ in self.jobs])
+        return ListPostTrainingJobsResponse(
+            data=[PostTrainingJob(job_uuid=job.id) for job in self._scheduler.get_jobs()]
+        )
+
+    @staticmethod
+    def _get_artifacts_metadata_by_type(job, artifact_type):
+        return [artifact.metadata for artifact in job.artifacts if artifact.type == artifact_type]
+
+    @classmethod
+    def _get_checkpoints(cls, job):
+        return cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.CHECKPOINT.value)
+
+    @classmethod
+    def _get_resources_allocated(cls, job):
+        data = cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.RESOURCES_STATS.value)
+        return data[0] if data else None
 
     @webmethod(route="/post-training/job/status")
-    async def get_training_job_status(self, job_uuid: str) -> Optional[PostTrainingJobStatusResponse]:
-        return self.jobs.get(job_uuid, None)
+    async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse | None:
+        job = self._scheduler.get_job(job_uuid)
+
+        match job.status:
+            # TODO: Add support for other statuses to API
+            case SchedulerJobStatus.new | SchedulerJobStatus.scheduled:
+                status = JobStatus.scheduled
+            case SchedulerJobStatus.running:
+                status = JobStatus.in_progress
+            case SchedulerJobStatus.completed:
+                status = JobStatus.completed
+            case SchedulerJobStatus.failed:
+                status = JobStatus.failed
+            case _:
+                raise NotImplementedError()
+
+        return PostTrainingJobStatusResponse(
+            job_uuid=job_uuid,
+            status=status,
+            scheduled_at=job.scheduled_at,
+            started_at=job.started_at,
+            completed_at=job.completed_at,
+            checkpoints=self._get_checkpoints(job),
+            resources_allocated=self._get_resources_allocated(job),
+        )
 
     @webmethod(route="/post-training/job/cancel")
     async def cancel_training_job(self, job_uuid: str) -> None:
-        raise NotImplementedError("Job cancel is not implemented yet")
+        self._scheduler.cancel(job_uuid)
 
     @webmethod(route="/post-training/job/artifacts")
-    async def get_training_job_artifacts(self, job_uuid: str) -> Optional[PostTrainingJobArtifactsResponse]:
-        if job_uuid in self.checkpoints_dict:
-            checkpoints = self.checkpoints_dict.get(job_uuid, [])
-            return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=checkpoints)
-        return None
+    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse | None:
+        job = self._scheduler.get_job(job_uuid)
+        return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=self._get_checkpoints(job))
diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index 04bf86b97..f56dd2499 100644
--- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -4,14 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import gc
 import logging
 import os
 import time
 from datetime import datetime, timezone
 from functools import partial
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 
 import torch
 from torch import nn
@@ -39,7 +38,6 @@ from llama_stack.apis.datasets import Datasets
 from llama_stack.apis.post_training import (
     Checkpoint,
     DataConfig,
-    EfficiencyConfig,
     LoraFinetuningConfig,
     OptimizerConfig,
     QATFinetuningConfig,
@@ -48,9 +46,7 @@ from llama_stack.apis.post_training import (
 from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
 from llama_stack.distribution.utils.model_utils import model_local_dir
 from llama_stack.models.llama.sku_list import resolve_model
-from llama_stack.providers.inline.post_training.common.validator import (
-    validate_input_dataset_schema,
-)
+from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
 from llama_stack.providers.inline.post_training.torchtune.common import utils
 from llama_stack.providers.inline.post_training.torchtune.common.checkpointer import (
     TorchtuneCheckpointer,
@@ -83,18 +79,16 @@ class LoraFinetuningSingleDevice:
         config: TorchtunePostTrainingConfig,
         job_uuid: str,
         training_config: TrainingConfig,
-        hyperparam_search_config: Dict[str, Any],
-        logger_config: Dict[str, Any],
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
         model: str,
-        checkpoint_dir: Optional[str],
+        checkpoint_dir: str | None,
         algorithm_config: LoraFinetuningConfig | QATFinetuningConfig | None,
         datasetio_api: DatasetIO,
         datasets_api: Datasets,
     ) -> None:
         assert isinstance(training_config.data_config, DataConfig), "DataConfig must be initialized"
 
-        assert isinstance(training_config.efficiency_config, EfficiencyConfig), "EfficiencyConfig must be initialized"
-
         self.job_uuid = job_uuid
         self.training_config = training_config
         if not isinstance(algorithm_config, LoraFinetuningConfig):
@@ -159,7 +153,7 @@ class LoraFinetuningSingleDevice:
         self.datasets_api = datasets_api
 
     async def load_checkpoint(self):
-        def get_checkpoint_files(checkpoint_dir: str) -> List[str]:
+        def get_checkpoint_files(checkpoint_dir: str) -> list[str]:
             try:
                 # List all files in the given directory
                 files = os.listdir(checkpoint_dir)
@@ -253,8 +247,8 @@ class LoraFinetuningSingleDevice:
         self,
         enable_activation_checkpointing: bool,
         enable_activation_offloading: bool,
-        base_model_state_dict: Dict[str, Any],
-        lora_weights_state_dict: Optional[Dict[str, Any]] = None,
+        base_model_state_dict: dict[str, Any],
+        lora_weights_state_dict: dict[str, Any] | None = None,
     ) -> nn.Module:
         self._lora_rank = self.algorithm_config.rank
         self._lora_alpha = self.algorithm_config.alpha
@@ -338,7 +332,7 @@ class LoraFinetuningSingleDevice:
         tokenizer: Llama3Tokenizer,
         shuffle: bool,
         batch_size: int,
-    ) -> Tuple[DistributedSampler, DataLoader]:
+    ) -> tuple[DistributedSampler, DataLoader]:
         async def fetch_rows(dataset_id: str):
             return await self.datasetio_api.iterrows(
                 dataset_id=dataset_id,
@@ -348,11 +342,9 @@ class LoraFinetuningSingleDevice:
         all_rows = await fetch_rows(dataset_id)
         rows = all_rows.data
 
-        await validate_input_dataset_schema(
-            datasets_api=self.datasets_api,
-            dataset_id=dataset_id,
-            dataset_type=self._data_format.value,
-        )
+        # TODO (xiyan): validate dataset schema
+        # dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
+
         data_transform = await utils.get_data_transform(self._data_format)
         ds = SFTDataset(
             rows,
@@ -435,7 +427,7 @@ class LoraFinetuningSingleDevice:
             checkpoint_format=self._checkpoint_format,
         )
 
-    async def _loss_step(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
+    async def _loss_step(self, batch: dict[str, torch.Tensor]) -> torch.Tensor:
         # Shape [b, s], needed for the loss not the model
         labels = batch.pop("labels")
         # run model
@@ -457,7 +449,7 @@ class LoraFinetuningSingleDevice:
 
         return loss
 
-    async def train(self) -> Tuple[Dict[str, Any], List[Checkpoint]]:
+    async def train(self) -> tuple[dict[str, Any], list[Checkpoint]]:
         """
         The core training loop.
         """
@@ -469,7 +461,7 @@ class LoraFinetuningSingleDevice:
 
         # training artifacts
         checkpoints = []
-        memory_stats: Dict[str, Any] = {}
+        memory_stats: dict[str, Any] = {}
 
         # self.epochs_run should be non-zero when we're resuming from a checkpoint
         for curr_epoch in range(self.epochs_run, self.total_epochs):
@@ -562,15 +554,11 @@ class LoraFinetuningSingleDevice:
             checkpoints.append(checkpoint)
 
         # clean up the memory after training finishes
-        if self._device.type != "cpu":
-            self._model.to("cpu")
-            torch.cuda.empty_cache()
-        del self._model
-        gc.collect()
+        evacuate_model_from_device(self._model, self._device.type)
 
         return (memory_stats, checkpoints)
 
-    async def validation(self) -> Tuple[float, float]:
+    async def validation(self) -> tuple[float, float]:
         total_loss = 0.0
         total_tokens = 0
         log.info("Starting validation...")
diff --git a/llama_stack/providers/inline/safety/code_scanner/__init__.py b/llama_stack/providers/inline/safety/code_scanner/__init__.py
index 62975a963..68e32b747 100644
--- a/llama_stack/providers/inline/safety/code_scanner/__init__.py
+++ b/llama_stack/providers/inline/safety/code_scanner/__init__.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from .config import CodeScannerConfig
 
 
-async def get_provider_impl(config: CodeScannerConfig, deps: Dict[str, Any]):
+async def get_provider_impl(config: CodeScannerConfig, deps: dict[str, Any]):
     from .code_scanner import MetaReferenceCodeScannerSafetyImpl
 
     impl = MetaReferenceCodeScannerSafetyImpl(config, deps)
diff --git a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
index 606d11d2c..be05ee436 100644
--- a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
+++ b/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import logging
-from typing import Any, Dict, List
+from typing import Any
 
 from llama_stack.apis.inference import Message
 from llama_stack.apis.safety import (
@@ -48,8 +48,8 @@ class MetaReferenceCodeScannerSafetyImpl(Safety):
     async def run_shield(
         self,
         shield_id: str,
-        messages: List[Message],
-        params: Dict[str, Any] = None,
+        messages: list[Message],
+        params: dict[str, Any] = None,
     ) -> RunShieldResponse:
         shield = await self.shield_store.get_shield(shield_id)
         if not shield:
diff --git a/llama_stack/providers/inline/safety/code_scanner/config.py b/llama_stack/providers/inline/safety/code_scanner/config.py
index 1d880ee9c..66eb8e368 100644
--- a/llama_stack/providers/inline/safety/code_scanner/config.py
+++ b/llama_stack/providers/inline/safety/code_scanner/config.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
 
 class CodeScannerConfig(BaseModel):
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {}
diff --git a/llama_stack/providers/inline/safety/llama_guard/__init__.py b/llama_stack/providers/inline/safety/llama_guard/__init__.py
index a4263b169..8865cc344 100644
--- a/llama_stack/providers/inline/safety/llama_guard/__init__.py
+++ b/llama_stack/providers/inline/safety/llama_guard/__init__.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from .config import LlamaGuardConfig
 
 
-async def get_provider_impl(config: LlamaGuardConfig, deps: Dict[str, Any]):
+async def get_provider_impl(config: LlamaGuardConfig, deps: dict[str, Any]):
     from .llama_guard import LlamaGuardSafetyImpl
 
     assert isinstance(config, LlamaGuardConfig), f"Unexpected config type: {type(config)}"
diff --git a/llama_stack/providers/inline/safety/llama_guard/config.py b/llama_stack/providers/inline/safety/llama_guard/config.py
index 53849ab33..412e7218d 100644
--- a/llama_stack/providers/inline/safety/llama_guard/config.py
+++ b/llama_stack/providers/inline/safety/llama_guard/config.py
@@ -4,16 +4,16 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, List
+from typing import Any
 
 from pydantic import BaseModel
 
 
 class LlamaGuardConfig(BaseModel):
-    excluded_categories: List[str] = []
+    excluded_categories: list[str] = []
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "excluded_categories": [],
         }
diff --git a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
index 2ab16f986..937301c2e 100644
--- a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@@ -6,7 +6,7 @@
 
 import re
 from string import Template
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem
 from llama_stack.apis.inference import (
@@ -149,8 +149,8 @@ class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
     async def run_shield(
         self,
         shield_id: str,
-        messages: List[Message],
-        params: Dict[str, Any] = None,
+        messages: list[Message],
+        params: dict[str, Any] = None,
     ) -> RunShieldResponse:
         shield = await self.shield_store.get_shield(shield_id)
         if not shield:
@@ -177,7 +177,7 @@ class LlamaGuardShield:
         self,
         model: str,
         inference_api: Inference,
-        excluded_categories: Optional[List[str]] = None,
+        excluded_categories: list[str] | None = None,
     ):
         if excluded_categories is None:
             excluded_categories = []
@@ -193,7 +193,7 @@ class LlamaGuardShield:
         self.inference_api = inference_api
         self.excluded_categories = excluded_categories
 
-    def check_unsafe_response(self, response: str) -> Optional[str]:
+    def check_unsafe_response(self, response: str) -> str | None:
         match = re.match(r"^unsafe\n(.*)$", response)
         if match:
             # extracts the unsafe code
@@ -202,7 +202,7 @@ class LlamaGuardShield:
 
         return None
 
-    def get_safety_categories(self) -> List[str]:
+    def get_safety_categories(self) -> list[str]:
         excluded_categories = self.excluded_categories
         if set(excluded_categories) == set(SAFETY_CATEGORIES_TO_CODE_MAP.values()):
             excluded_categories = []
@@ -218,7 +218,7 @@ class LlamaGuardShield:
 
         return final_categories
 
-    def validate_messages(self, messages: List[Message]) -> None:
+    def validate_messages(self, messages: list[Message]) -> None:
         if len(messages) == 0:
             raise ValueError("Messages must not be empty")
         if messages[0].role != Role.user.value:
@@ -229,7 +229,7 @@ class LlamaGuardShield:
 
         return messages
 
-    async def run(self, messages: List[Message]) -> RunShieldResponse:
+    async def run(self, messages: list[Message]) -> RunShieldResponse:
         messages = self.validate_messages(messages)
 
         if self.model == CoreModelId.llama_guard_3_11b_vision.value:
@@ -247,10 +247,10 @@ class LlamaGuardShield:
         content = content.strip()
         return self.get_shield_response(content)
 
-    def build_text_shield_input(self, messages: List[Message]) -> UserMessage:
+    def build_text_shield_input(self, messages: list[Message]) -> UserMessage:
         return UserMessage(content=self.build_prompt(messages))
 
-    def build_vision_shield_input(self, messages: List[Message]) -> UserMessage:
+    def build_vision_shield_input(self, messages: list[Message]) -> UserMessage:
         conversation = []
         most_recent_img = None
 
@@ -284,7 +284,7 @@ class LlamaGuardShield:
 
         return UserMessage(content=prompt)
 
-    def build_prompt(self, messages: List[Message]) -> str:
+    def build_prompt(self, messages: list[Message]) -> str:
         categories = self.get_safety_categories()
         categories_str = "\n".join(categories)
         conversations_str = "\n\n".join(
diff --git a/llama_stack/providers/inline/safety/prompt_guard/__init__.py b/llama_stack/providers/inline/safety/prompt_guard/__init__.py
index 747f34421..1761c9138 100644
--- a/llama_stack/providers/inline/safety/prompt_guard/__init__.py
+++ b/llama_stack/providers/inline/safety/prompt_guard/__init__.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
-from .config import PromptGuardConfig  # noqa: F401
+from .config import PromptGuardConfig
 
 
-async def get_provider_impl(config: PromptGuardConfig, deps: Dict[str, Any]):
+async def get_provider_impl(config: PromptGuardConfig, deps: dict[str, Any]):
     from .prompt_guard import PromptGuardSafetyImpl
 
     impl = PromptGuardSafetyImpl(config, deps)
diff --git a/llama_stack/providers/inline/safety/prompt_guard/config.py b/llama_stack/providers/inline/safety/prompt_guard/config.py
index 76bd5978d..69ea512c5 100644
--- a/llama_stack/providers/inline/safety/prompt_guard/config.py
+++ b/llama_stack/providers/inline/safety/prompt_guard/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel, field_validator
 
@@ -26,7 +26,7 @@ class PromptGuardConfig(BaseModel):
         return v
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "guard_type": "injection",
         }
diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index fce3e3d14..ff87889ea 100644
--- a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import logging
-from typing import Any, Dict, List
+from typing import Any
 
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
@@ -49,8 +49,8 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
     async def run_shield(
         self,
         shield_id: str,
-        messages: List[Message],
-        params: Dict[str, Any] = None,
+        messages: list[Message],
+        params: dict[str, Any] = None,
     ) -> RunShieldResponse:
         shield = await self.shield_store.get_shield(shield_id)
         if not shield:
@@ -75,13 +75,15 @@ class PromptGuardShield:
         self.temperature = temperature
         self.threshold = threshold
 
-        self.device = "cuda"
+        self.device = "cpu"
+        if torch.cuda.is_available():
+            self.device = "cuda"
 
         # load model and tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
         self.model = AutoModelForSequenceClassification.from_pretrained(model_dir, device_map=self.device)
 
-    async def run(self, messages: List[Message]) -> RunShieldResponse:
+    async def run(self, messages: list[Message]) -> RunShieldResponse:
         message = messages[-1]
         text = interleaved_content_as_str(message.content)
 
diff --git a/llama_stack/providers/inline/scoring/basic/__init__.py b/llama_stack/providers/inline/scoring/basic/__init__.py
index 4898b973a..d9d150b1a 100644
--- a/llama_stack/providers/inline/scoring/basic/__init__.py
+++ b/llama_stack/providers/inline/scoring/basic/__init__.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.distribution.datatypes import Api
 
@@ -12,7 +12,7 @@ from .config import BasicScoringConfig
 
 async def get_provider_impl(
     config: BasicScoringConfig,
-    deps: Dict[Api, Any],
+    deps: dict[Api, Any],
 ):
     from .scoring import BasicScoringImpl
 
diff --git a/llama_stack/providers/inline/scoring/basic/config.py b/llama_stack/providers/inline/scoring/basic/config.py
index 5866be359..e9c7fb451 100644
--- a/llama_stack/providers/inline/scoring/basic/config.py
+++ b/llama_stack/providers/inline/scoring/basic/config.py
@@ -3,12 +3,12 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
 
 class BasicScoringConfig(BaseModel):
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {}
diff --git a/llama_stack/providers/inline/scoring/basic/scoring.py b/llama_stack/providers/inline/scoring/basic/scoring.py
index 9a45f7139..09f89be5e 100644
--- a/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
@@ -66,7 +66,7 @@ class BasicScoringImpl(
 
     async def shutdown(self) -> None: ...
 
-    async def list_scoring_functions(self) -> List[ScoringFn]:
+    async def list_scoring_functions(self) -> list[ScoringFn]:
         scoring_fn_defs_list = [
             fn_def for impl in self.scoring_fn_id_impls.values() for fn_def in impl.get_supported_scoring_fn_defs()
         ]
@@ -82,7 +82,7 @@ class BasicScoringImpl(
     async def score_batch(
         self,
         dataset_id: str,
-        scoring_functions: Dict[str, Optional[ScoringFnParams]] = None,
+        scoring_functions: dict[str, ScoringFnParams | None] = None,
         save_results_dataset: bool = False,
     ) -> ScoreBatchResponse:
         dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
@@ -107,8 +107,8 @@ class BasicScoringImpl(
 
     async def score(
         self,
-        input_rows: List[Dict[str, Any]],
-        scoring_functions: Dict[str, Optional[ScoringFnParams]] = None,
+        input_rows: list[dict[str, Any]],
+        scoring_functions: dict[str, ScoringFnParams | None] = None,
     ) -> ScoreResponse:
         res = {}
         for scoring_fn_id in scoring_functions.keys():
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/bfcl_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/bfcl_scoring_fn.py
index f37780f3e..b29620be2 100644
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/bfcl_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/bfcl_scoring_fn.py
@@ -6,7 +6,7 @@
 
 import json
 import re
-from typing import Any, Dict, Optional
+from typing import Any
 
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
@@ -17,7 +17,7 @@ from ..utils.bfcl.checker import ast_checker, is_empty_output
 from .fn_defs.bfcl import bfcl
 
 
-def postprocess(x: Dict[str, Any], test_category: str) -> Dict[str, Any]:
+def postprocess(x: dict[str, Any], test_category: str) -> dict[str, Any]:
     contain_func_call = False
     error = None
     error_type = None
@@ -52,11 +52,11 @@ def postprocess(x: Dict[str, Any], test_category: str) -> Dict[str, Any]:
     }
 
 
-def gen_valid(x: Dict[str, Any]) -> Dict[str, float]:
+def gen_valid(x: dict[str, Any]) -> dict[str, float]:
     return {"valid": x["valid"]}
 
 
-def gen_relevance_acc(x: Dict[str, Any]) -> Dict[str, float]:
+def gen_relevance_acc(x: dict[str, Any]) -> dict[str, float]:
     # This function serves for both relevance and irrelevance tests, which share the exact opposite logic.
     # If `test_category` is "irrelevance", the model is expected to output no function call.
     # No function call means either the AST decoding fails (a error message is generated) or the decoded AST does not contain any function call (such as a empty list, `[]`).
@@ -78,9 +78,9 @@ class BFCLScoringFn(RegisteredBaseScoringFn):
 
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = "bfcl",
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = "bfcl",
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         test_category = re.sub(r"_[0-9_-]+$", "", input_row["id"])
         score_result = postprocess(input_row, test_category)
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
index 84ca55732..b87974d08 100644
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
@@ -6,7 +6,7 @@
 
 import json
 import re
-from typing import Any, Dict, Optional
+from typing import Any
 
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
@@ -228,9 +228,9 @@ class DocVQAScoringFn(RegisteredBaseScoringFn):
 
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = "docvqa",
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = "docvqa",
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         expected_answers = json.loads(input_row["expected_answer"])
         generated_answer = input_row["generated_answer"]
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
index 0bd6bdd48..60804330f 100644
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
@@ -26,9 +26,9 @@ class EqualityScoringFn(RegisteredBaseScoringFn):
 
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = "equality",
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = "equality",
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         assert "expected_answer" in input_row, "Expected answer not found in input row."
         assert "generated_answer" in input_row, "Generated answer not found in input row."
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
index 6ff856684..77f6176e6 100644
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
@@ -28,9 +28,9 @@ class IfEvalScoringFn(RegisteredBaseScoringFn):
 
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         from ..utils.ifeval_utils import INSTRUCTION_DICT, INSTRUCTION_LIST
 
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
index d6c78a9ac..d765959a8 100644
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict, Optional
+from typing import Any
 
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
@@ -28,9 +28,9 @@ class RegexParserMathResponseScoringFn(RegisteredBaseScoringFn):
 
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         assert scoring_fn_identifier is not None, "Scoring function identifier not found."
         fn_def = self.supported_fn_defs_registry[scoring_fn_identifier]
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
index 0606a9581..cb336e303 100644
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import re
-from typing import Any, Dict, Optional
+from typing import Any
 
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
@@ -28,9 +28,9 @@ class RegexParserScoringFn(RegisteredBaseScoringFn):
 
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         assert scoring_fn_identifier is not None, "Scoring function identifier not found."
         fn_def = self.supported_fn_defs_registry[scoring_fn_identifier]
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
index 71defc433..d6e10e6c9 100644
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
@@ -26,9 +26,9 @@ class SubsetOfScoringFn(RegisteredBaseScoringFn):
 
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = "subset_of",
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = "subset_of",
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         expected_answer = input_row["expected_answer"]
         generated_answer = input_row["generated_answer"]
diff --git a/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py b/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
index 28605159f..b74c3826e 100644
--- a/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
@@ -11,8 +11,8 @@ import logging
 import random
 import re
 import string
+from collections.abc import Iterable, Sequence
 from types import MappingProxyType
-from typing import Dict, Iterable, List, Optional, Sequence, Union
 
 import emoji
 import langdetect
@@ -1673,12 +1673,11 @@ def split_chinese_japanese_hindi(lines: str) -> Iterable[str]:
     The separator for hindi is '।'
     """
     for line in lines.splitlines():
-        for sent in re.findall(
+        yield from re.findall(
             r"[^!?。\.\!\?\！\？\．\n।]+[!?。\.\!\?\！\？\．\n।]?",
             line.strip(),
             flags=re.U,
-        ):
-            yield sent
+        )
 
 
 def count_words_cjk(text: str) -> int:
@@ -1707,7 +1706,7 @@ def count_words_cjk(text: str) -> int:
     return non_asian_words_cnt + asian_chars_cnt + emoji_cnt
 
 
-@functools.lru_cache(maxsize=None)
+@functools.cache
 def _get_sentence_tokenizer():
     return nltk.data.load("nltk:tokenizers/punkt/english.pickle")
 
@@ -1719,8 +1718,8 @@ def count_sentences(text):
     return len(tokenized_sentences)
 
 
-def get_langid(text: str, lid_path: Optional[str] = None) -> str:
-    line_langs: List[str] = []
+def get_langid(text: str, lid_path: str | None = None) -> str:
+    line_langs: list[str] = []
     lines = [line.strip() for line in text.split("\n") if len(line.strip()) >= 4]
 
     for line in lines:
@@ -1741,7 +1740,7 @@ def generate_keywords(num_keywords):
 
 
 """Library of instructions"""
-_InstructionArgsDtype = Optional[Dict[str, Union[int, str, Sequence[str]]]]
+_InstructionArgsDtype = dict[str, int | str | Sequence[str]] | None
 
 _LANGUAGES = LANGUAGE_CODES
 
diff --git a/llama_stack/providers/inline/scoring/basic/utils/math_utils.py b/llama_stack/providers/inline/scoring/basic/utils/math_utils.py
index e11fc625b..6840aad14 100644
--- a/llama_stack/providers/inline/scoring/basic/utils/math_utils.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/math_utils.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import re
-from typing import Sequence
+from collections.abc import Sequence
 
 from llama_stack.providers.utils.scoring.basic_scoring_utils import time_limit
 
@@ -323,7 +323,7 @@ def _fix_a_slash_b(string: str) -> str:
     try:
         ia = int(a)
         ib = int(b)
-        assert string == "{}/{}".format(ia, ib)
+        assert string == f"{ia}/{ib}"
         new_string = "\\frac{" + str(ia) + "}{" + str(ib) + "}"
         return new_string
     except (ValueError, AssertionError):
diff --git a/llama_stack/providers/inline/scoring/braintrust/__init__.py b/llama_stack/providers/inline/scoring/braintrust/__init__.py
index f1b0112d9..8ea6e9b96 100644
--- a/llama_stack/providers/inline/scoring/braintrust/__init__.py
+++ b/llama_stack/providers/inline/scoring/braintrust/__init__.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -18,7 +18,7 @@ class BraintrustProviderDataValidator(BaseModel):
 
 async def get_provider_impl(
     config: BraintrustScoringConfig,
-    deps: Dict[Api, Any],
+    deps: dict[Api, Any],
 ):
     from .braintrust import BraintrustScoringImpl
 
diff --git a/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/llama_stack/providers/inline/scoring/braintrust/braintrust.py
index 3fae83340..d6655d657 100644
--- a/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import os
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 from autoevals.llm import Factuality
 from autoevals.ragas import (
@@ -132,7 +132,7 @@ class BraintrustScoringImpl(
 
     async def shutdown(self) -> None: ...
 
-    async def list_scoring_functions(self) -> List[ScoringFn]:
+    async def list_scoring_functions(self) -> list[ScoringFn]:
         scoring_fn_defs_list = list(self.supported_fn_defs_registry.values())
         for f in scoring_fn_defs_list:
             assert f.identifier.startswith("braintrust"), (
@@ -159,7 +159,7 @@ class BraintrustScoringImpl(
     async def score_batch(
         self,
         dataset_id: str,
-        scoring_functions: Dict[str, Optional[ScoringFnParams]],
+        scoring_functions: dict[str, ScoringFnParams | None],
         save_results_dataset: bool = False,
     ) -> ScoreBatchResponse:
         await self.set_api_key()
@@ -181,9 +181,7 @@ class BraintrustScoringImpl(
             results=res.results,
         )
 
-    async def score_row(
-        self, input_row: Dict[str, Any], scoring_fn_identifier: Optional[str] = None
-    ) -> ScoringResultRow:
+    async def score_row(self, input_row: dict[str, Any], scoring_fn_identifier: str | None = None) -> ScoringResultRow:
         validate_row_schema(input_row, get_valid_schemas(Api.scoring.value))
         await self.set_api_key()
         assert scoring_fn_identifier is not None, "scoring_fn_identifier cannot be None"
@@ -203,8 +201,8 @@ class BraintrustScoringImpl(
 
     async def score(
         self,
-        input_rows: List[Dict[str, Any]],
-        scoring_functions: Dict[str, Optional[ScoringFnParams]],
+        input_rows: list[dict[str, Any]],
+        scoring_functions: dict[str, ScoringFnParams | None],
     ) -> ScoreResponse:
         await self.set_api_key()
         res = {}
diff --git a/llama_stack/providers/inline/scoring/braintrust/config.py b/llama_stack/providers/inline/scoring/braintrust/config.py
index d4e0d9bcd..4a80f1e4f 100644
--- a/llama_stack/providers/inline/scoring/braintrust/config.py
+++ b/llama_stack/providers/inline/scoring/braintrust/config.py
@@ -3,19 +3,19 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
 
 class BraintrustScoringConfig(BaseModel):
-    openai_api_key: Optional[str] = Field(
+    openai_api_key: str | None = Field(
         default=None,
         description="The OpenAI API Key",
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
             "openai_api_key": "${env.OPENAI_API_KEY:}",
         }
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py b/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py
index 4a83bfe13..88bf10737 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.distribution.datatypes import Api
 
@@ -12,7 +12,7 @@ from .config import LlmAsJudgeScoringConfig
 
 async def get_provider_impl(
     config: LlmAsJudgeScoringConfig,
-    deps: Dict[Api, Any],
+    deps: dict[Api, Any],
 ):
     from .scoring import LlmAsJudgeScoringImpl
 
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/config.py b/llama_stack/providers/inline/scoring/llm_as_judge/config.py
index ff63fc5e7..b150ef54c 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/config.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/config.py
@@ -3,12 +3,12 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
 
 class LlmAsJudgeScoringConfig(BaseModel):
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {}
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index 7f004fbb6..b705cb9b3 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
@@ -50,7 +50,7 @@ class LlmAsJudgeScoringImpl(
 
     async def shutdown(self) -> None: ...
 
-    async def list_scoring_functions(self) -> List[ScoringFn]:
+    async def list_scoring_functions(self) -> list[ScoringFn]:
         scoring_fn_defs_list = self.llm_as_judge_fn.get_supported_scoring_fn_defs()
 
         for f in self.llm_as_judge_fn.get_supported_scoring_fn_defs():
@@ -66,7 +66,7 @@ class LlmAsJudgeScoringImpl(
     async def score_batch(
         self,
         dataset_id: str,
-        scoring_functions: Dict[str, Optional[ScoringFnParams]] = None,
+        scoring_functions: dict[str, ScoringFnParams | None] = None,
         save_results_dataset: bool = False,
     ) -> ScoreBatchResponse:
         dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
@@ -91,8 +91,8 @@ class LlmAsJudgeScoringImpl(
 
     async def score(
         self,
-        input_rows: List[Dict[str, Any]],
-        scoring_functions: Dict[str, Optional[ScoringFnParams]] = None,
+        input_rows: list[dict[str, Any]],
+        scoring_functions: dict[str, ScoringFnParams | None] = None,
     ) -> ScoreResponse:
         res = {}
         for scoring_fn_id in scoring_functions.keys():
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index f4e8ab0aa..51cdf6c3f 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import re
-from typing import Any, Dict, Optional
+from typing import Any
 
 from llama_stack.apis.inference.inference import Inference, UserMessage
 from llama_stack.apis.scoring import ScoringResultRow
@@ -30,9 +30,9 @@ class LlmAsJudgeScoringFn(RegisteredBaseScoringFn):
 
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         assert scoring_fn_identifier is not None, "Scoring function identifier not found."
         fn_def = self.supported_fn_defs_registry[scoring_fn_identifier]
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/__init__.py b/llama_stack/providers/inline/telemetry/meta_reference/__init__.py
index 23468c5d0..09e97136a 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/__init__.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.distribution.datatypes import Api
 
@@ -13,7 +13,7 @@ from .config import TelemetryConfig, TelemetrySink
 __all__ = ["TelemetryConfig", "TelemetrySink"]
 
 
-async def get_provider_impl(config: TelemetryConfig, deps: Dict[Api, Any]):
+async def get_provider_impl(config: TelemetryConfig, deps: dict[Api, Any]):
     from .telemetry import TelemetryAdapter
 
     impl = TelemetryAdapter(config, deps)
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py
index 57312f41f..af53bfd9c 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/config.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Any, Dict, List
+from typing import Any
 
 from pydantic import BaseModel, Field, field_validator
 
@@ -30,10 +30,10 @@ class TelemetryConfig(BaseModel):
     )
     service_name: str = Field(
         # service name is always the same, use zero-width space to avoid clutter
-        default="​",
+        default="",
         description="The service name to use for telemetry",
     )
-    sinks: List[TelemetrySink] = Field(
+    sinks: list[TelemetrySink] = Field(
         default=[TelemetrySink.CONSOLE, TelemetrySink.SQLITE],
         description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
     )
@@ -50,9 +50,9 @@ class TelemetryConfig(BaseModel):
         return v
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
         return {
-            "service_name": "${env.OTEL_SERVICE_NAME:​}",
+            "service_name": "${env.OTEL_SERVICE_NAME:}",
             "sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
-            "sqlite_db_path": "${env.SQLITE_DB_PATH:" + __distro_dir__ + "/" + db_name + "}",
+            "sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
         }
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py b/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
index b909d32ef..ff1914c15 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
@@ -78,7 +78,7 @@ class ConsoleSpanProcessor(SpanProcessor):
 
             severity = event.attributes.get("severity", "info")
             message = event.attributes.get("message", event.name)
-            if isinstance(message, (dict, list)):
+            if isinstance(message, dict | list):
                 message = json.dumps(message, indent=2)
 
             severity_colors = {
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
index 9b23c8229..0f6cf8619 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import threading
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 from opentelemetry import metrics, trace
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
@@ -16,11 +16,15 @@ from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import BatchSpanProcessor
 from opentelemetry.semconv.resource import ResourceAttributes
+from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
 
 from llama_stack.apis.telemetry import (
     Event,
     MetricEvent,
+    MetricLabelMatcher,
+    MetricQueryType,
     QueryCondition,
+    QueryMetricsResponse,
     QuerySpanTreeResponse,
     QueryTracesResponse,
     Span,
@@ -41,6 +45,7 @@ from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor
 )
 from llama_stack.providers.utils.telemetry.dataset_mixin import TelemetryDatasetMixin
 from llama_stack.providers.utils.telemetry.sqlite_trace_store import SQLiteTraceStore
+from llama_stack.providers.utils.telemetry.tracing import ROOT_SPAN_MARKERS
 
 from .config import TelemetryConfig, TelemetrySink
 
@@ -60,7 +65,7 @@ def is_tracing_enabled(tracer):
 
 
 class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
-    def __init__(self, config: TelemetryConfig, deps: Dict[Api, Any]) -> None:
+    def __init__(self, config: TelemetryConfig, deps: dict[Api, Any]) -> None:
         self.config = config
         self.datasetio_api = deps.get(Api.datasetio)
         self.meter = None
@@ -123,6 +128,17 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
         else:
             raise ValueError(f"Unknown event type: {event}")
 
+    async def query_metrics(
+        self,
+        metric_name: str,
+        start_time: int,
+        end_time: int | None = None,
+        granularity: str | None = "1d",
+        query_type: MetricQueryType = MetricQueryType.RANGE,
+        label_matchers: list[MetricLabelMatcher] | None = None,
+    ) -> QueryMetricsResponse:
+        raise NotImplementedError("Querying metrics is not implemented")
+
     def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
         with self._lock:
             # Use global storage instead of instance storage
@@ -132,7 +148,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
             if span:
                 timestamp_ns = int(event.timestamp.timestamp() * 1e9)
                 span.add_event(
-                    name=event.type,
+                    name=event.type.value,
                     attributes={
                         "message": event.message,
                         "severity": event.severity.value,
@@ -192,6 +208,15 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
                 event.attributes = {}
             event.attributes["__ttl__"] = ttl_seconds
 
+            # Extract these W3C trace context attributes so they are not written to
+            # underlying storage, as we just need them to propagate the trace context.
+            traceparent = event.attributes.pop("traceparent", None)
+            tracestate = event.attributes.pop("tracestate", None)
+            if traceparent:
+                # If we have a traceparent header value, we're not the root span.
+                for root_attribute in ROOT_SPAN_MARKERS:
+                    event.attributes.pop(root_attribute, None)
+
             if isinstance(event.payload, SpanStartPayload):
                 # Check if span already exists to prevent duplicates
                 if span_id in _GLOBAL_STORAGE["active_spans"]:
@@ -202,8 +227,12 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
                     parent_span_id = int(event.payload.parent_span_id, 16)
                     parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
                     context = trace.set_span_in_context(parent_span)
-                else:
-                    event.attributes["__root_span__"] = "true"
+                elif traceparent:
+                    carrier = {
+                        "traceparent": traceparent,
+                        "tracestate": tracestate,
+                    }
+                    context = TraceContextTextMapPropagator().extract(carrier=carrier)
 
                 span = tracer.start_span(
                     name=event.payload.name,
@@ -231,10 +260,10 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
 
     async def query_traces(
         self,
-        attribute_filters: Optional[List[QueryCondition]] = None,
-        limit: Optional[int] = 100,
-        offset: Optional[int] = 0,
-        order_by: Optional[List[str]] = None,
+        attribute_filters: list[QueryCondition] | None = None,
+        limit: int | None = 100,
+        offset: int | None = 0,
+        order_by: list[str] | None = None,
     ) -> QueryTracesResponse:
         return QueryTracesResponse(
             data=await self.trace_store.query_traces(
@@ -254,8 +283,8 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
     async def get_span_tree(
         self,
         span_id: str,
-        attributes_to_return: Optional[List[str]] = None,
-        max_depth: Optional[int] = None,
+        attributes_to_return: list[str] | None = None,
+        max_depth: int | None = None,
     ) -> QuerySpanTreeResponse:
         return QuerySpanTreeResponse(
             data=await self.trace_store.get_span_tree(
diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py
deleted file mode 100644
index 8317ce793..000000000
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any, Dict
-
-from .config import CodeInterpreterToolConfig
-
-__all__ = ["CodeInterpreterToolConfig", "CodeInterpreterToolRuntimeImpl"]
-
-
-async def get_provider_impl(config: CodeInterpreterToolConfig, _deps: Dict[str, Any]):
-    from .code_interpreter import CodeInterpreterToolRuntimeImpl
-
-    impl = CodeInterpreterToolRuntimeImpl(config)
-    await impl.initialize()
-    return impl
diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_env_prefix.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_env_prefix.py
deleted file mode 100644
index 9c5f642ea..000000000
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_env_prefix.py
+++ /dev/null
@@ -1,131 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import errno
-
-# Disabling potentially dangerous functions
-import os as _os
-from functools import partial
-
-os_funcs_to_disable = [
-    "kill",
-    "system",
-    "putenv",
-    "remove",
-    "removedirs",
-    "rmdir",
-    "fchdir",
-    "setuid",
-    "fork",
-    "forkpty",
-    "killpg",
-    "rename",
-    "renames",
-    "truncate",
-    "replace",
-    # "unlink",  # Commenting as this was blocking matpltlib from rendering plots correctly
-    "fchmod",
-    "fchown",
-    "chmod",
-    "chown",
-    "chroot",
-    "fchdir",
-    "lchflags",
-    "lchmod",
-    "lchown",
-    "chdir",
-]
-
-
-def call_not_allowed(*args, **kwargs):
-    raise OSError(errno.EPERM, "Call are not permitted in this environment")
-
-
-for func_name in os_funcs_to_disable:
-    if hasattr(_os, func_name):
-        setattr(_os, func_name, partial(call_not_allowed, _func_name=f"os.{func_name}"))
-
-import shutil as _shutil
-
-for func_name in ["rmtree", "move", "chown"]:
-    if hasattr(_shutil, func_name):
-        setattr(
-            _shutil,
-            func_name,
-            partial(call_not_allowed, _func_name=f"shutil.{func_name}"),
-        )
-
-import subprocess as _subprocess
-
-
-def popen_not_allowed(*args, **kwargs):
-    raise _subprocess.CalledProcessError(
-        -1,
-        args[0] if args else "unknown",
-        stderr="subprocess.Popen is not allowed in this environment",
-    )
-
-
-_subprocess.Popen = popen_not_allowed  # type: ignore
-
-
-import atexit as _atexit
-import builtins as _builtins
-import io as _io
-import json as _json
-import sys as _sys
-
-# NB! The following "unused" imports crucial, make sure not not to remove
-# them with linters - they're used in code_execution.py
-from contextlib import (  # noqa
-    contextmanager as _contextmanager,
-)
-from multiprocessing.connection import Connection as _Connection
-
-# Mangle imports to avoid polluting model execution namespace.
-
-_IO_SINK = _io.StringIO()
-_NETWORK_TIMEOUT = 5
-_NETWORK_CONNECTIONS = None
-
-
-def _open_connections():
-    global _NETWORK_CONNECTIONS
-    if _NETWORK_CONNECTIONS is not None:
-        # Ensure connections only opened once.
-        return _NETWORK_CONNECTIONS
-    req_w_fd, resp_r_fd = _sys.argv[1], _sys.argv[2]
-    req_con = _Connection(int(req_w_fd), readable=False)
-    resp_con = _Connection(int(resp_r_fd), writable=False)
-    _NETWORK_CONNECTIONS = (req_con, resp_con)
-    return _NETWORK_CONNECTIONS
-
-
-_builtins._open_connections = _open_connections  # type: ignore
-
-
-@_atexit.register
-def _close_connections():
-    global _NETWORK_CONNECTIONS
-    if _NETWORK_CONNECTIONS is None:
-        return
-    for con in _NETWORK_CONNECTIONS:
-        con.close()
-    del _NETWORK_CONNECTIONS
-
-
-def _network_call(request):
-    # NOTE: We communicate with the parent process in json, encoded
-    # in raw bytes. We do this because native send/recv methods use
-    # pickle which involves execution of arbitrary code.
-    _open_connections()
-    req_con, resp_con = _NETWORK_CONNECTIONS
-
-    req_con.send_bytes(_json.dumps(request).encode("utf-8"))
-    if resp_con.poll(timeout=_NETWORK_TIMEOUT) is None:
-        raise Exception(f"Network request timed out: {_json.dumps(request)}")
-    else:
-        return _json.loads(resp_con.recv_bytes().decode("utf-8"))
diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_execution.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_execution.py
deleted file mode 100644
index 6106cf741..000000000
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_execution.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import base64
-import json
-import multiprocessing
-import os
-import re
-import subprocess
-import sys
-import tempfile
-import textwrap
-import time
-from dataclasses import dataclass
-from datetime import datetime
-from io import BytesIO
-from pathlib import Path
-from typing import List
-
-from PIL import Image
-
-from .utils import get_code_env_prefix
-
-TOOLS_ATTACHMENT_KEY = "__tools_attachment__"
-TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
-
-DIRNAME = Path(__file__).parent
-
-CODE_EXEC_TIMEOUT = 20
-CODE_ENV_PREFIX = get_code_env_prefix()
-
-STDOUTERR_SINK_WRAPPER_TEMPLATE = """\
-with _redirect_stdout(_IO_SINK), _redirect_stderr(_IO_SINK):
-{code}\
-"""
-
-TRYEXCEPT_WRAPPER_TEMPLATE = """\
-try:
-{code}
-except:
-    pass\
-"""
-
-
-def generate_bwrap_command(bind_dirs: List[str]) -> str:
-    """
-    Generate the bwrap command string for binding all
-    directories in the current directory read-only.
-    """
-    bwrap_args = ""
-    bwrap_args += "--ro-bind / / "
-    # Add the --dev flag to mount device files
-    bwrap_args += "--dev /dev "
-    for d in bind_dirs:
-        bwrap_args += f"--bind {d} {d} "
-
-    # Add the --unshare-all flag to isolate the sandbox from the rest of the system
-    bwrap_args += "--unshare-all "
-    # Add the --die-with-parent flag to ensure the child process dies when bwrap's parent dies
-    bwrap_args += "--die-with-parent "
-    return bwrap_args
-
-
-@dataclass
-class CodeExecutionContext:
-    matplotlib_dump_dir: str
-
-
-@dataclass
-class CodeExecutionRequest:
-    scripts: List[str]
-    only_last_cell_stdouterr: bool = True
-    only_last_cell_fail: bool = True
-    seed: int = 0
-    strip_fpaths_in_stderr: bool = True
-    use_bwrap: bool = True
-
-
-class CodeExecutor:
-    def __init__(self, context: CodeExecutionContext):
-        self.context = context
-
-    def execute(self, req: CodeExecutionRequest) -> dict:
-        scripts = req.scripts
-        for i in range(len(scripts) - 1):
-            if req.only_last_cell_stdouterr:
-                scripts[i] = STDOUTERR_SINK_WRAPPER_TEMPLATE.format(code=textwrap.indent(scripts[i], " " * 4))
-            if req.only_last_cell_fail:
-                scripts[i] = TRYEXCEPT_WRAPPER_TEMPLATE.format(code=textwrap.indent(scripts[i], " " * 4))
-
-        # Seeds prefix:
-        seed = req.seed
-        seeds_prefix = f"""\
-def _set_seeds():
-    import random
-    random.seed({seed})
-    import numpy as np
-    np.random.seed({seed})
-_set_seeds()\
-"""
-
-        script = "\n\n".join([seeds_prefix] + [CODE_ENV_PREFIX] + scripts)
-        with tempfile.TemporaryDirectory() as dpath:
-            code_fpath = os.path.join(dpath, "code.py")
-            with open(code_fpath, "w") as f:
-                f.write(script)
-
-            try:
-                python_path = os.environ.get("PYTHONPATH", "")
-                env = dict(
-                    os.environ,
-                    PYTHONHASHSEED=str(seed),
-                    MPLCONFIGDIR=dpath,
-                    MPLBACKEND="module://matplotlib_custom_backend",
-                    PYTHONPATH=f"{DIRNAME}:{python_path}",
-                )
-
-                if req.use_bwrap:
-                    bwrap_prefix = "bwrap " + generate_bwrap_command(bind_dirs=[dpath])
-                    cmd = [*bwrap_prefix.split(), sys.executable, "-c", script]
-                else:
-                    cmd = [sys.executable, "-c", script]
-
-                stdout, stderr, returncode = do_subprocess(
-                    cmd=cmd,
-                    env=env,
-                    ctx=self.context,
-                )
-
-                stderr = stderr.strip()
-                if req.strip_fpaths_in_stderr:
-                    pattern = r'File "([^"]+)", line (\d+)'
-                    stderr = re.sub(pattern, r"line \2", stderr)
-
-                return {
-                    "process_status": "completed",
-                    "returncode": returncode,
-                    "stdout": stdout.strip(),
-                    "stderr": stderr,
-                }
-
-            except subprocess.TimeoutExpired:
-                return {
-                    "process_status": "timeout",
-                    "stdout": "Timed out",
-                    "stderr": "Timed out",
-                }
-
-            except Exception as e:
-                return {
-                    "process_status": "error",
-                    "error_type": type(e).__name__,
-                    "stderr": str(e),
-                    "stdout": str(e),
-                }
-
-
-def process_matplotlib_response(response, matplotlib_dump_dir: str):
-    image_data = response["image_data"]
-    # Convert the base64 string to a bytes object
-    images_raw = [base64.b64decode(d["image_base64"]) for d in image_data]
-    # Create a list of PIL images from the bytes objects
-    images = [Image.open(BytesIO(img)) for img in images_raw]
-    # Create a list of image paths
-    image_paths = []
-    for i, img in enumerate(images):
-        # create new directory for each day to better organize data:
-        dump_dname = datetime.today().strftime("%Y-%m-%d")  # noqa: DTZ002 - we don't care about timezones here since we are displaying the date
-        dump_dpath = Path(matplotlib_dump_dir, dump_dname)
-        dump_dpath.mkdir(parents=True, exist_ok=True)
-        # save image into a file
-        dump_fname = f"matplotlib_{str(time.time()).replace('.', '_')}_{i}.png"
-        dump_fpath = dump_dpath / dump_fname
-        img.save(dump_fpath, "PNG")
-        image_paths.append(str(dump_fpath))
-
-    # this is kind of convoluted, we send back this response to the subprocess which
-    # prints it out
-    info = {
-        "filepath": str(image_paths[-1]),
-        "mimetype": "image/png",
-    }
-    return f"{TOOLS_ATTACHMENT_KEY}={json.dumps(info)}"
-
-
-def execute_subprocess_request(request, ctx: CodeExecutionContext):
-    "Route requests from the subprocess (via network Pipes) to the internet/tools."
-    if request["type"] == "matplotlib":
-        return process_matplotlib_response(request, ctx.matplotlib_dump_dir)
-    else:
-        raise Exception(f"Unrecognised network request type: {request['type']}")
-
-
-def do_subprocess(*, cmd: list, env: dict, ctx: CodeExecutionContext):
-    # Create Pipes to be used for any external tool/network requests.
-    req_r, req_w = multiprocessing.Pipe(duplex=False)
-    resp_r, resp_w = multiprocessing.Pipe(duplex=False)
-
-    cmd += [str(req_w.fileno()), str(resp_r.fileno())]
-    proc = subprocess.Popen(
-        cmd,
-        pass_fds=(req_w.fileno(), resp_r.fileno()),
-        text=True,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        close_fds=True,
-        env=env,
-    )
-
-    # Close unnecessary fds.
-    req_w.close()
-    resp_r.close()
-
-    pipe_close = False
-    done_read = False
-    start = time.monotonic()
-    while proc.poll() is None and not pipe_close:
-        if req_r.poll(0.1):
-            # NB: Python pipe semantics for poll and recv mean that
-            # poll() returns True is a pipe is closed.
-            # CF old school PEP from '09
-            #  https://bugs.python.org/issue5573
-            try:
-                request = json.loads(req_r.recv_bytes().decode("utf-8"))
-                response = execute_subprocess_request(request, ctx)
-
-                resp_w.send_bytes(json.dumps(response).encode("utf-8"))
-            except EOFError:
-                # The request pipe is closed - set a marker to exit
-                # after the next attempt at reading stdout/stderr.
-                pipe_close = True
-
-            try:
-                # If lots has been printed, pipe might be full but
-                # proc cannot exit until all the stdout/stderr
-                # been written/read.
-                stdout, stderr = proc.communicate(timeout=0.3)
-                done_read = True
-            except subprocess.TimeoutExpired:
-                # The program has not terminated. Ignore it, there
-                # may be more network/tool requests.
-                continue
-        if time.monotonic() - start > CODE_EXEC_TIMEOUT:
-            proc.terminate()
-            raise subprocess.TimeoutExpired(cmd, CODE_EXEC_TIMEOUT)
-
-    if not done_read:
-        # Solve race condition where process terminates before
-        # we hit the while loop.
-        stdout, stderr = proc.communicate(timeout=0.3)
-
-    resp_w.close()
-    req_r.close()
-    return stdout, stderr, proc.returncode
diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
deleted file mode 100644
index 10ac2fcc6..000000000
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-import asyncio
-import logging
-import os
-import tempfile
-from typing import Any, Dict, Optional
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.tools import (
-    ListToolDefsResponse,
-    Tool,
-    ToolDef,
-    ToolInvocationResult,
-    ToolParameter,
-    ToolRuntime,
-)
-from llama_stack.providers.datatypes import ToolsProtocolPrivate
-
-from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor
-from .config import CodeInterpreterToolConfig
-
-log = logging.getLogger(__name__)
-
-
-class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime):
-    def __init__(self, config: CodeInterpreterToolConfig):
-        self.config = config
-        ctx = CodeExecutionContext(
-            matplotlib_dump_dir=tempfile.mkdtemp(),
-        )
-        self.code_executor = CodeExecutor(ctx)
-
-    async def initialize(self):
-        pass
-
-    async def register_tool(self, tool: Tool) -> None:
-        pass
-
-    async def unregister_tool(self, tool_id: str) -> None:
-        return
-
-    async def list_runtime_tools(
-        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
-    ) -> ListToolDefsResponse:
-        return ListToolDefsResponse(
-            data=[
-                ToolDef(
-                    name="code_interpreter",
-                    description="Execute code",
-                    parameters=[
-                        ToolParameter(
-                            name="code",
-                            description="The code to execute",
-                            parameter_type="string",
-                        ),
-                    ],
-                )
-            ]
-        )
-
-    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
-        script = kwargs["code"]
-        # Use environment variable to control bwrap usage
-        force_disable_bwrap = os.environ.get("DISABLE_CODE_SANDBOX", "").lower() in ("1", "true", "yes")
-        req = CodeExecutionRequest(scripts=[script], use_bwrap=not force_disable_bwrap)
-        res = await asyncio.to_thread(self.code_executor.execute, req)
-        pieces = [res["process_status"]]
-        for out_type in ["stdout", "stderr"]:
-            res_out = res[out_type]
-            if res_out != "":
-                pieces.extend([f"[{out_type}]", res_out, f"[/{out_type}]"])
-                if out_type == "stderr":
-                    log.error(f"ipython tool error: ↓\n{res_out}")
-        return ToolInvocationResult(content="\n".join(pieces))
diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/config.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/config.py
deleted file mode 100644
index 7de1ec453..000000000
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/config.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any, Dict
-
-from pydantic import BaseModel
-
-
-class CodeInterpreterToolConfig(BaseModel):
-    @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
-        return {}
diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/matplotlib_custom_backend.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/matplotlib_custom_backend.py
deleted file mode 100644
index 6454358a5..000000000
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/matplotlib_custom_backend.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-A custom Matplotlib backend that overrides the show method to return image bytes.
-"""
-
-import base64
-import io
-import json as _json
-import logging
-
-import matplotlib
-from matplotlib.backend_bases import FigureManagerBase
-
-# Import necessary components from Matplotlib
-from matplotlib.backends.backend_agg import FigureCanvasAgg
-
-log = logging.getLogger(__name__)
-
-
-class CustomFigureCanvas(FigureCanvasAgg):
-    def show(self):
-        # Save the figure to a BytesIO object
-        buf = io.BytesIO()
-        self.print_png(buf)
-        image_bytes = buf.getvalue()
-        buf.close()
-        return image_bytes
-
-
-class CustomFigureManager(FigureManagerBase):
-    def __init__(self, canvas, num):
-        super().__init__(canvas, num)
-
-
-# Mimic module initialization that integrates with the Matplotlib backend system
-def _create_figure_manager(num, *args, **kwargs):
-    """
-    Create a custom figure manager instance.
-    """
-    FigureClass = kwargs.pop("FigureClass", None)  # noqa: N806
-    if FigureClass is None:
-        from matplotlib.figure import Figure
-
-        FigureClass = Figure  # noqa: N806
-    fig = FigureClass(*args, **kwargs)
-    canvas = CustomFigureCanvas(fig)
-    manager = CustomFigureManager(canvas, num)
-    return manager
-
-
-def show():
-    """
-    Handle all figures and potentially return their images as bytes.
-
-    This function iterates over all figures registered with the custom backend,
-    renders them as images in bytes format, and could return a list of bytes objects,
-    one for each figure, or handle them as needed.
-    """
-    image_data = []
-    for manager in matplotlib._pylab_helpers.Gcf.get_all_fig_managers():
-        # Get the figure from the manager
-        fig = manager.canvas.figure
-        buf = io.BytesIO()  # Create a buffer for the figure
-        fig.savefig(buf, format="png")  # Save the figure to the buffer in PNG format
-        buf.seek(0)  # Go to the beginning of the buffer
-        image_bytes = buf.getvalue()  # Retrieve bytes value
-        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
-        image_data.append({"image_base64": image_base64})
-        buf.close()
-
-    # The _open_connections method is dynamically made available to
-    # the interpreter by bundling code from "code_env_prefix.py" -- by literally prefixing it -- and
-    # then "eval"ing it within a sandboxed interpreter.
-    req_con, resp_con = _open_connections()  # noqa: F821
-
-    _json_dump = _json.dumps(
-        {
-            "type": "matplotlib",
-            "image_data": image_data,
-        }
-    )
-    req_con.send_bytes(_json_dump.encode("utf-8"))
-    resp = _json.loads(resp_con.recv_bytes().decode("utf-8"))
-    log.info(resp)
-
-
-FigureCanvas = CustomFigureCanvas
-FigureManager = CustomFigureManager
diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/utils.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/utils.py
deleted file mode 100644
index d6f539a39..000000000
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/utils.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-
-DIR = os.path.dirname(os.path.realpath(__file__))
-CODE_ENV_PREFIX_FILE = os.path.join(DIR, "code_env_prefix.py")
-CODE_ENV_PREFIX = None
-
-
-def get_code_env_prefix() -> str:
-    global CODE_ENV_PREFIX
-
-    if CODE_ENV_PREFIX is None:
-        with open(CODE_ENV_PREFIX_FILE, "r") as f:
-            CODE_ENV_PREFIX = f.read()
-
-    return CODE_ENV_PREFIX
diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/llama_stack/providers/inline/tool_runtime/rag/__init__.py
index 0ef3c35e9..f9a6e5c55 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/__init__.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/__init__.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.providers.datatypes import Api
 
 from .config import RagToolRuntimeConfig
 
 
-async def get_provider_impl(config: RagToolRuntimeConfig, deps: Dict[Api, Any]):
+async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]):
     from .memory import MemoryToolRuntimeImpl
 
     impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
diff --git a/llama_stack/providers/inline/tool_runtime/rag/config.py b/llama_stack/providers/inline/tool_runtime/rag/config.py
index c75c3fc51..43ba78e65 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/config.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/config.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
 
 class RagToolRuntimeConfig(BaseModel):
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {}
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 97c53d454..4776d47d0 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -8,7 +8,7 @@ import asyncio
 import logging
 import secrets
 import string
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 from pydantic import TypeAdapter
 
@@ -25,14 +25,15 @@ from llama_stack.apis.tools import (
     RAGQueryConfig,
     RAGQueryResult,
     RAGToolRuntime,
-    Tool,
     ToolDef,
+    ToolGroup,
     ToolInvocationResult,
     ToolParameter,
     ToolRuntime,
 )
 from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO
-from llama_stack.providers.datatypes import ToolsProtocolPrivate
+from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.memory.vector_store import (
     content_from_doc,
     make_overlapped_chunks,
@@ -48,7 +49,7 @@ def make_random_string(length: int = 8):
     return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
 
 
-class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
+class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
     def __init__(
         self,
         config: RagToolRuntimeConfig,
@@ -65,15 +66,15 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
     async def shutdown(self):
         pass
 
-    async def register_tool(self, tool: Tool) -> None:
+    async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
         pass
 
-    async def unregister_tool(self, tool_id: str) -> None:
+    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
         return
 
     async def insert(
         self,
-        documents: List[RAGDocument],
+        documents: list[RAGDocument],
         vector_db_id: str,
         chunk_size_in_tokens: int = 512,
     ) -> None:
@@ -86,6 +87,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
                     content,
                     chunk_size_in_tokens,
                     chunk_size_in_tokens // 4,
+                    doc.metadata,
                 )
             )
 
@@ -100,11 +102,13 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
     async def query(
         self,
         content: InterleavedContent,
-        vector_db_ids: List[str],
-        query_config: Optional[RAGQueryConfig] = None,
+        vector_db_ids: list[str],
+        query_config: RAGQueryConfig | None = None,
     ) -> RAGQueryResult:
         if not vector_db_ids:
-            return RAGQueryResult(content=None)
+            raise ValueError(
+                "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
+            )
 
         query_config = query_config or RAGQueryConfig()
         query = await generate_rag_query(
@@ -118,11 +122,12 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
                 query=query,
                 params={
                     "max_chunks": query_config.max_chunks,
+                    "mode": query_config.mode,
                 },
             )
             for vector_db_id in vector_db_ids
         ]
-        results: List[QueryChunksResponse] = await asyncio.gather(*tasks)
+        results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
         chunks = [c for r in results for c in r.chunks]
         scores = [s for r in results for s in r.scores]
 
@@ -139,20 +144,27 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
                 text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n"
             )
         ]
-        for i, c in enumerate(chunks):
-            metadata = c.metadata
-            tokens += metadata["token_count"]
+        for i, chunk in enumerate(chunks):
+            metadata = chunk.metadata
+            tokens += metadata.get("token_count", 0)
+            tokens += metadata.get("metadata_token_count", 0)
+
             if tokens > query_config.max_tokens_in_context:
                 log.error(
                     f"Using {len(picked)} chunks; reached max tokens in context: {tokens}",
                 )
                 break
-            picked.append(
-                TextContentItem(
-                    text=f"Result {i + 1}:\nDocument_id:{metadata['document_id'][:5]}\nContent: {c.content}\n",
-                )
-            )
+
+            metadata_subset = {k: v for k, v in metadata.items() if k not in ["token_count", "metadata_token_count"]}
+            text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_subset)
+            picked.append(TextContentItem(text=text_content))
+
         picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
+        picked.append(
+            TextContentItem(
+                text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n',
+            )
+        )
 
         return RAGQueryResult(
             content=picked,
@@ -162,7 +174,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
         )
 
     async def list_runtime_tools(
-        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
     ) -> ListToolDefsResponse:
         # Parameters are not listed since these methods are not yet invoked automatically
         # by the LLM. The method is only implemented so things like /tools can list without
@@ -187,7 +199,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
         vector_db_ids = kwargs.get("vector_db_ids", [])
         query_config = kwargs.get("query_config")
         if query_config:
diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/llama_stack/providers/inline/vector_io/chroma/__init__.py
index f39188b46..2e0efb8a1 100644
--- a/llama_stack/providers/inline/vector_io/chroma/__init__.py
+++ b/llama_stack/providers/inline/vector_io/chroma/__init__.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.providers.datatypes import Api
 
 from .config import ChromaVectorIOConfig
 
 
-async def get_provider_impl(config: ChromaVectorIOConfig, deps: Dict[Api, Any]):
+async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]):
     from llama_stack.providers.remote.vector_io.chroma.chroma import (
         ChromaVectorIOAdapter,
     )
diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/llama_stack/providers/inline/vector_io/chroma/config.py
index 1e333fe92..81e2f289e 100644
--- a/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/llama_stack/providers/inline/vector_io/chroma/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -13,5 +13,5 @@ class ChromaVectorIOConfig(BaseModel):
     db_path: str
 
     @classmethod
-    def sample_run_config(cls, db_path: str = "${env.CHROMADB_PATH}", **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, db_path: str = "${env.CHROMADB_PATH}", **kwargs: Any) -> dict[str, Any]:
         return {"db_path": db_path}
diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py
index fc8ce70b4..68a1dee66 100644
--- a/llama_stack/providers/inline/vector_io/faiss/__init__.py
+++ b/llama_stack/providers/inline/vector_io/faiss/__init__.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.providers.datatypes import Api
 
 from .config import FaissVectorIOConfig
 
 
-async def get_provider_impl(config: FaissVectorIOConfig, deps: Dict[Api, Any]):
+async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
     from .faiss import FaissVectorIOAdapter
 
     assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/llama_stack/providers/inline/vector_io/faiss/config.py
index fa6e5bede..cbcbb1762 100644
--- a/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/llama_stack/providers/inline/vector_io/faiss/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -20,7 +20,7 @@ class FaissVectorIOConfig(BaseModel):
     kvstore: KVStoreConfig
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "kvstore": SqliteKVStoreConfig.sample_run_config(
                 __distro_dir__=__distro_dir__,
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 20c795650..47256d88d 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -9,7 +9,7 @@ import base64
 import io
 import json
 import logging
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 import faiss
 import numpy as np
@@ -84,7 +84,7 @@ class FaissIndex(EmbeddingIndex):
 
         await self.kvstore.delete(f"{FAISS_INDEX_PREFIX}{self.bank_id}")
 
-    async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
+    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         # Add dimension check
         embedding_dim = embeddings.shape[1] if len(embeddings.shape) > 1 else embeddings.shape[0]
         if embedding_dim != self.index.d:
@@ -99,9 +99,13 @@ class FaissIndex(EmbeddingIndex):
         # Save updated index
         await self._save_index()
 
-    async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+    async def query_vector(
+        self,
+        embedding: NDArray,
+        k: int,
+        score_threshold: float,
+    ) -> QueryChunksResponse:
         distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
-
         chunks = []
         scores = []
         for d, i in zip(distances[0], indices[0], strict=False):
@@ -112,6 +116,14 @@ class FaissIndex(EmbeddingIndex):
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
+    async def query_keyword(
+        self,
+        query_string: str,
+        k: int,
+        score_threshold: float,
+    ) -> QueryChunksResponse:
+        raise NotImplementedError("Keyword search is not supported in FAISS")
+
 
 class FaissVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     def __init__(self, config: FaissVectorIOConfig, inference_api: Inference) -> None:
@@ -125,7 +137,7 @@ class FaissVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         # Load existing banks from kvstore
         start_key = VECTOR_DBS_PREFIX
         end_key = f"{VECTOR_DBS_PREFIX}\xff"
-        stored_vector_dbs = await self.kvstore.range(start_key, end_key)
+        stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
 
         for vector_db_data in stored_vector_dbs:
             vector_db = VectorDB.model_validate_json(vector_db_data)
@@ -159,7 +171,7 @@ class FaissVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
             inference_api=self.inference_api,
         )
 
-    async def list_vector_dbs(self) -> List[VectorDB]:
+    async def list_vector_dbs(self) -> list[VectorDB]:
         return [i.vector_db for i in self.cache.values()]
 
     async def unregister_vector_db(self, vector_db_id: str) -> None:
@@ -176,8 +188,8 @@ class FaissVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     async def insert_chunks(
         self,
         vector_db_id: str,
-        chunks: List[Chunk],
-        ttl_seconds: Optional[int] = None,
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
     ) -> None:
         index = self.cache.get(vector_db_id)
         if index is None:
@@ -189,7 +201,7 @@ class FaissVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         self,
         vector_db_id: str,
         query: InterleavedContent,
-        params: Optional[Dict[str, Any]] = None,
+        params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
         index = self.cache.get(vector_db_id)
         if index is None:
diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/llama_stack/providers/inline/vector_io/milvus/__init__.py
index d88a3b005..fe3a1f7f9 100644
--- a/llama_stack/providers/inline/vector_io/milvus/__init__.py
+++ b/llama_stack/providers/inline/vector_io/milvus/__init__.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.providers.datatypes import Api
 
 from .config import MilvusVectorIOConfig
 
 
-async def get_provider_impl(config: MilvusVectorIOConfig, deps: Dict[Api, Any]):
+async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]):
     from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
 
     impl = MilvusVectorIOAdapter(config, deps[Api.inference])
diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/llama_stack/providers/inline/vector_io/milvus/config.py
index 0e11d8c7c..eb22b5276 100644
--- a/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/llama_stack/providers/inline/vector_io/milvus/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -16,5 +16,5 @@ class MilvusVectorIOConfig(BaseModel):
     db_path: str
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {"db_path": "${env.MILVUS_DB_PATH}"}
diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py
index 8f0b91c61..ee33b3797 100644
--- a/llama_stack/providers/inline/vector_io/qdrant/__init__.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py
@@ -4,14 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Dict
-
 from llama_stack.providers.datatypes import Api, ProviderSpec
 
 from .config import QdrantVectorIOConfig
 
 
-async def get_adapter_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
 
     impl = QdrantVectorIOAdapter(config, deps[Api.inference])
diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py
index 282e951b0..283724b41 100644
--- a/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -17,7 +17,7 @@ class QdrantVectorIOConfig(BaseModel):
     path: str
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
             "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
         }
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
index 2380eb0ef..6db176eda 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from llama_stack.providers.datatypes import Api
 
 from .config import SQLiteVectorIOConfig
 
 
-async def get_provider_impl(config: SQLiteVectorIOConfig, deps: Dict[Api, Any]):
+async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
     from .sqlite_vec import SQLiteVecVectorIOAdapter
 
     assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
index 906c19689..cb806cb39 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -13,7 +13,7 @@ class SQLiteVectorIOConfig(BaseModel):
     db_path: str
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
             "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db",
         }
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 5f7671138..fc1a8ddb0 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -10,7 +10,7 @@ import logging
 import sqlite3
 import struct
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 import numpy as np
 import sqlite_vec
@@ -24,8 +24,13 @@ from llama_stack.providers.utils.memory.vector_store import EmbeddingIndex, Vect
 
 logger = logging.getLogger(__name__)
 
+# Specifying search mode is dependent on the VectorIO provider.
+VECTOR_SEARCH = "vector"
+KEYWORD_SEARCH = "keyword"
+SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH}
 
-def serialize_vector(vector: List[float]) -> bytes:
+
+def serialize_vector(vector: list[float]) -> bytes:
     """Serialize a list of floats into a compact binary representation."""
     return struct.pack(f"{len(vector)}f", *vector)
 
@@ -45,6 +50,7 @@ class SQLiteVecIndex(EmbeddingIndex):
     Two tables are used:
       - A metadata table (chunks_{bank_id}) that holds the chunk JSON.
       - A virtual table (vec_chunks_{bank_id}) that holds the serialized vector.
+      - An FTS5 table (fts_chunks_{bank_id}) for full-text keyword search.
     """
 
     def __init__(self, dimension: int, db_path: str, bank_id: str):
@@ -53,6 +59,7 @@ class SQLiteVecIndex(EmbeddingIndex):
         self.bank_id = bank_id
         self.metadata_table = f"chunks_{bank_id}".replace("-", "_")
         self.vector_table = f"vec_chunks_{bank_id}".replace("-", "_")
+        self.fts_table = f"fts_chunks_{bank_id}".replace("-", "_")
 
     @classmethod
     async def create(cls, dimension: int, db_path: str, bank_id: str):
@@ -78,6 +85,14 @@ class SQLiteVecIndex(EmbeddingIndex):
                     USING vec0(embedding FLOAT[{self.dimension}], id TEXT);
                 """)
                 connection.commit()
+                # FTS5 table (for keyword search) - creating both the tables by default. Will use the relevant one
+                # based on query. Implementation of the change on client side will allow passing the search_mode option
+                # during initialization to make it easier to create the table that is required.
+                cur.execute(f"""
+                            CREATE VIRTUAL TABLE IF NOT EXISTS {self.fts_table}
+                            USING fts5(id, content);
+                        """)
+                connection.commit()
             finally:
                 cur.close()
                 connection.close()
@@ -91,6 +106,7 @@ class SQLiteVecIndex(EmbeddingIndex):
             try:
                 cur.execute(f"DROP TABLE IF EXISTS {self.metadata_table};")
                 cur.execute(f"DROP TABLE IF EXISTS {self.vector_table};")
+                cur.execute(f"DROP TABLE IF EXISTS {self.fts_table};")
                 connection.commit()
             finally:
                 cur.close()
@@ -98,12 +114,13 @@ class SQLiteVecIndex(EmbeddingIndex):
 
         await asyncio.to_thread(_drop_tables)
 
-    async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray, batch_size: int = 500):
+    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray, batch_size: int = 500):
         """
         Add new chunks along with their embeddings using batch inserts.
         For each chunk, we insert its JSON into the metadata table and then insert its
         embedding (serialized to raw bytes) into the virtual table using the assigned rowid.
         If any insert fails, the transaction is rolled back to maintain consistency.
+        Also inserts chunk content into FTS table for keyword search support.
         """
         assert all(isinstance(chunk.content, str) for chunk in chunks), "SQLiteVecIndex only supports text chunks"
 
@@ -112,18 +129,16 @@ class SQLiteVecIndex(EmbeddingIndex):
             cur = connection.cursor()
 
             try:
-                # Start transaction a single transcation for all batches
                 cur.execute("BEGIN TRANSACTION")
                 for i in range(0, len(chunks), batch_size):
                     batch_chunks = chunks[i : i + batch_size]
                     batch_embeddings = embeddings[i : i + batch_size]
-                    # Prepare metadata inserts
+
+                    # Insert metadata
                     metadata_data = [
                         (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.model_dump_json())
                         for chunk in batch_chunks
-                        if isinstance(chunk.content, str)
                     ]
-                    # Insert metadata (ON CONFLICT to avoid duplicates)
                     cur.executemany(
                         f"""
                         INSERT INTO {self.metadata_table} (id, chunk)
@@ -132,21 +147,43 @@ class SQLiteVecIndex(EmbeddingIndex):
                         """,
                         metadata_data,
                     )
-                    # Prepare embeddings inserts
+
+                    # Insert vector embeddings
                     embedding_data = [
                         (
-                            generate_chunk_id(chunk.metadata["document_id"], chunk.content),
-                            serialize_vector(emb.tolist()),
+                            (
+                                generate_chunk_id(chunk.metadata["document_id"], chunk.content),
+                                serialize_vector(emb.tolist()),
+                            )
                         )
                         for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
-                        if isinstance(chunk.content, str)
                     ]
-                    # Insert embeddings in batch
-                    cur.executemany(f"INSERT INTO {self.vector_table} (id, embedding) VALUES (?, ?);", embedding_data)
+                    cur.executemany(
+                        f"INSERT INTO {self.vector_table} (id, embedding) VALUES (?, ?);",
+                        embedding_data,
+                    )
+
+                    # Insert FTS content
+                    fts_data = [
+                        (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.content)
+                        for chunk in batch_chunks
+                    ]
+                    # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
+                    cur.executemany(
+                        f"DELETE FROM {self.fts_table} WHERE id = ?;",
+                        [(row[0],) for row in fts_data],
+                    )
+
+                    # INSERT new entries
+                    cur.executemany(
+                        f"INSERT INTO {self.fts_table} (id, content) VALUES (?, ?);",
+                        fts_data,
+                    )
+
                 connection.commit()
 
             except sqlite3.Error as e:
-                connection.rollback()  # Rollback on failure
+                connection.rollback()
                 logger.error(f"Error inserting into {self.vector_table}: {e}")
                 raise
 
@@ -154,22 +191,25 @@ class SQLiteVecIndex(EmbeddingIndex):
                 cur.close()
                 connection.close()
 
-        # Process all batches in a single thread
+        # Run batch insertion in a background thread
         await asyncio.to_thread(_execute_all_batch_inserts)
 
-    async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+    async def query_vector(
+        self,
+        embedding: NDArray,
+        k: int,
+        score_threshold: float,
+    ) -> QueryChunksResponse:
         """
-        Query for the k most similar chunks. We convert the query embedding to a blob and run a SQL query
-        against the virtual table. The SQL joins the metadata table to recover the chunk JSON.
+        Performs vector-based search using a virtual table for vector similarity.
         """
-        emb_list = embedding.tolist() if isinstance(embedding, np.ndarray) else list(embedding)
-        emb_blob = serialize_vector(emb_list)
 
         def _execute_query():
             connection = _create_sqlite_connection(self.db_path)
             cur = connection.cursor()
-
             try:
+                emb_list = embedding.tolist() if isinstance(embedding, np.ndarray) else list(embedding)
+                emb_blob = serialize_vector(emb_list)
                 query_sql = f"""
                     SELECT m.id, m.chunk, v.distance
                     FROM {self.vector_table} AS v
@@ -184,17 +224,66 @@ class SQLiteVecIndex(EmbeddingIndex):
                 connection.close()
 
         rows = await asyncio.to_thread(_execute_query)
-
         chunks, scores = [], []
-        for _id, chunk_json, distance in rows:
+        for row in rows:
+            _id, chunk_json, distance = row
+            score = 1.0 / distance if distance != 0 else float("inf")
+            if score < score_threshold:
+                continue
+            try:
+                chunk = Chunk.model_validate_json(chunk_json)
+            except Exception as e:
+                logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
+                continue
+            chunks.append(chunk)
+            scores.append(score)
+        return QueryChunksResponse(chunks=chunks, scores=scores)
+
+    async def query_keyword(
+        self,
+        query_string: str,
+        k: int,
+        score_threshold: float,
+    ) -> QueryChunksResponse:
+        """
+        Performs keyword-based search using SQLite FTS5 for relevance-ranked full-text search.
+        """
+        if query_string is None:
+            raise ValueError("query_string is required for keyword search.")
+
+        def _execute_query():
+            connection = _create_sqlite_connection(self.db_path)
+            cur = connection.cursor()
+            try:
+                query_sql = f"""
+                    SELECT DISTINCT m.id, m.chunk, bm25({self.fts_table}) AS score
+                    FROM {self.fts_table} AS f
+                    JOIN {self.metadata_table} AS m ON m.id = f.id
+                    WHERE f.content MATCH ?
+                    ORDER BY score ASC
+                    LIMIT ?;
+                """
+                cur.execute(query_sql, (query_string, k))
+                return cur.fetchall()
+            finally:
+                cur.close()
+                connection.close()
+
+        rows = await asyncio.to_thread(_execute_query)
+        chunks, scores = [], []
+        for row in rows:
+            _id, chunk_json, score = row
+            # BM25 scores returned by sqlite-vec are NEGATED (i.e., more relevant = more negative).
+            # This design is intentional to simplify sorting by ascending score.
+            # Reference: https://alexgarcia.xyz/blog/2024/sqlite-vec-hybrid-search/index.html
+            if score > -score_threshold:
+                continue
             try:
                 chunk = Chunk.model_validate_json(chunk_json)
             except Exception as e:
                 logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
                 continue
             chunks.append(chunk)
-            # Mimic the Faiss scoring: score = 1/distance (avoid division by zero)
-            score = 1.0 / distance if distance != 0 else float("inf")
             scores.append(score)
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
@@ -209,7 +298,7 @@ class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     def __init__(self, config, inference_api: Inference) -> None:
         self.config = config
         self.inference_api = inference_api
-        self.cache: Dict[str, VectorDBWithIndex] = {}
+        self.cache: dict[str, VectorDBWithIndex] = {}
 
     async def initialize(self) -> None:
         def _setup_connection():
@@ -264,7 +353,7 @@ class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier)
         self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
 
-    async def list_vector_dbs(self) -> List[VectorDB]:
+    async def list_vector_dbs(self) -> list[VectorDB]:
         return [v.vector_db for v in self.cache.values()]
 
     async def unregister_vector_db(self, vector_db_id: str) -> None:
@@ -286,7 +375,7 @@ class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
 
         await asyncio.to_thread(_delete_vector_db_from_registry)
 
-    async def insert_chunks(self, vector_db_id: str, chunks: List[Chunk], ttl_seconds: Optional[int] = None) -> None:
+    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
         if vector_db_id not in self.cache:
             raise ValueError(f"Vector DB {vector_db_id} not found. Found: {list(self.cache.keys())}")
         # The VectorDBWithIndex helper is expected to compute embeddings via the inference_api
@@ -294,7 +383,7 @@ class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         await self.cache[vector_db_id].insert_chunks(chunks)
 
     async def query_chunks(
-        self, vector_db_id: str, query: Any, params: Optional[Dict[str, Any]] = None
+        self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
         if vector_db_id not in self.cache:
             raise ValueError(f"Vector DB {vector_db_id} not found")
@@ -303,5 +392,5 @@ class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
 
 def generate_chunk_id(document_id: str, chunk_text: str) -> str:
     """Generate a unique chunk ID using a hash of document ID and chunk text."""
-    hash_input = f"{document_id}:{chunk_text}".encode("utf-8")
+    hash_input = f"{document_id}:{chunk_text}".encode()
     return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py
index 3ed59304d..e0801a8d1 100644
--- a/llama_stack/providers/registry/agents.py
+++ b/llama_stack/providers/registry/agents.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.providers.datatypes import (
     Api,
@@ -14,7 +13,7 @@ from llama_stack.providers.datatypes import (
 from llama_stack.providers.utils.kvstore import kvstore_dependencies
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.agents,
diff --git a/llama_stack/providers/registry/datasetio.py b/llama_stack/providers/registry/datasetio.py
index f83dcbc60..152cc9cb9 100644
--- a/llama_stack/providers/registry/datasetio.py
+++ b/llama_stack/providers/registry/datasetio.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.providers.datatypes import (
     AdapterSpec,
@@ -15,7 +14,7 @@ from llama_stack.providers.datatypes import (
 )
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.datasetio,
@@ -36,4 +35,15 @@ def available_providers() -> List[ProviderSpec]:
                 config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
             ),
         ),
+        remote_provider_spec(
+            api=Api.datasetio,
+            adapter=AdapterSpec(
+                adapter_type="nvidia",
+                pip_packages=[
+                    "datasets",
+                ],
+                module="llama_stack.providers.remote.datasetio.nvidia",
+                config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
+            ),
+        ),
     ]
diff --git a/llama_stack/providers/registry/eval.py b/llama_stack/providers/registry/eval.py
index f3e42c531..c9c29bbe0 100644
--- a/llama_stack/providers/registry/eval.py
+++ b/llama_stack/providers/registry/eval.py
@@ -4,12 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec, ProviderSpec, remote_provider_spec
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.eval,
@@ -25,4 +24,22 @@ def available_providers() -> List[ProviderSpec]:
                 Api.agents,
             ],
         ),
+        remote_provider_spec(
+            api=Api.eval,
+            adapter=AdapterSpec(
+                adapter_type="nvidia",
+                pip_packages=[
+                    "requests",
+                ],
+                module="llama_stack.providers.remote.eval.nvidia",
+                config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
+            ),
+            api_dependencies=[
+                Api.datasetio,
+                Api.datasets,
+                Api.scoring,
+                Api.inference,
+                Api.agents,
+            ],
+        ),
     ]
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 3c54cabcf..7b49ef09b 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.providers.datatypes import (
     AdapterSpec,
@@ -29,7 +28,7 @@ META_REFERENCE_DEPS = [
 ]
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.inference,
@@ -227,6 +226,16 @@ def available_providers() -> List[ProviderSpec]:
                 provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
             ),
         ),
+        remote_provider_spec(
+            api=Api.inference,
+            adapter=AdapterSpec(
+                adapter_type="llama-openai-compat",
+                pip_packages=["litellm"],
+                module="llama_stack.providers.remote.inference.llama_openai_compat",
+                config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
+                provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
+            ),
+        ),
         remote_provider_spec(
             api=Api.inference,
             adapter=AdapterSpec(
@@ -271,11 +280,10 @@ def available_providers() -> List[ProviderSpec]:
             api=Api.inference,
             adapter=AdapterSpec(
                 adapter_type="sambanova",
-                pip_packages=[
-                    "openai",
-                ],
+                pip_packages=["litellm"],
                 module="llama_stack.providers.remote.inference.sambanova",
                 config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
+                provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
             ),
         ),
         remote_provider_spec(
@@ -288,4 +296,14 @@ def available_providers() -> List[ProviderSpec]:
                 provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
             ),
         ),
+        remote_provider_spec(
+            api=Api.inference,
+            adapter=AdapterSpec(
+                adapter_type="watsonx",
+                pip_packages=["ibm_watson_machine_learning"],
+                module="llama_stack.providers.remote.inference.watsonx",
+                config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
+                provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
+            ),
+        ),
     ]
diff --git a/llama_stack/providers/registry/post_training.py b/llama_stack/providers/registry/post_training.py
index 4d10fcf3b..d752b8819 100644
--- a/llama_stack/providers/registry/post_training.py
+++ b/llama_stack/providers/registry/post_training.py
@@ -4,12 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec, ProviderSpec, remote_provider_spec
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.post_training,
@@ -22,6 +21,17 @@ def available_providers() -> List[ProviderSpec]:
                 Api.datasets,
             ],
         ),
+        InlineProviderSpec(
+            api=Api.post_training,
+            provider_type="inline::huggingface",
+            pip_packages=["torch", "trl", "transformers", "peft", "datasets"],
+            module="llama_stack.providers.inline.post_training.huggingface",
+            config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
+            api_dependencies=[
+                Api.datasetio,
+                Api.datasets,
+            ],
+        ),
         remote_provider_spec(
             api=Api.post_training,
             adapter=AdapterSpec(
diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py
index 54dc51034..e0a04be48 100644
--- a/llama_stack/providers/registry/safety.py
+++ b/llama_stack/providers/registry/safety.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.providers.datatypes import (
     AdapterSpec,
@@ -15,7 +14,7 @@ from llama_stack.providers.datatypes import (
 )
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.safety,
@@ -64,4 +63,14 @@ def available_providers() -> List[ProviderSpec]:
                 config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
             ),
         ),
+        remote_provider_spec(
+            api=Api.safety,
+            adapter=AdapterSpec(
+                adapter_type="sambanova",
+                pip_packages=["litellm"],
+                module="llama_stack.providers.remote.safety.sambanova",
+                config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
+                provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
+            ),
+        ),
     ]
diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py
index ca09be984..7980d6a13 100644
--- a/llama_stack/providers/registry/scoring.py
+++ b/llama_stack/providers/registry/scoring.py
@@ -4,12 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.scoring,
diff --git a/llama_stack/providers/registry/telemetry.py b/llama_stack/providers/registry/telemetry.py
index fc249f3e2..14da06126 100644
--- a/llama_stack/providers/registry/telemetry.py
+++ b/llama_stack/providers/registry/telemetry.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.providers.datatypes import (
     Api,
@@ -13,7 +12,7 @@ from llama_stack.providers.datatypes import (
 )
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.telemetry,
diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py
index 95ea2dcf9..277914df2 100644
--- a/llama_stack/providers/registry/tool_runtime.py
+++ b/llama_stack/providers/registry/tool_runtime.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.providers.datatypes import (
     AdapterSpec,
@@ -15,7 +14,7 @@ from llama_stack.providers.datatypes import (
 )
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.tool_runtime,
@@ -36,13 +35,6 @@ def available_providers() -> List[ProviderSpec]:
             config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
             api_dependencies=[Api.vector_io, Api.inference],
         ),
-        InlineProviderSpec(
-            api=Api.tool_runtime,
-            provider_type="inline::code-interpreter",
-            pip_packages=[],
-            module="llama_stack.providers.inline.tool_runtime.code_interpreter",
-            config_class="llama_stack.providers.inline.tool_runtime.code_interpreter.config.CodeInterpreterToolConfig",
-        ),
         remote_provider_spec(
             api=Api.tool_runtime,
             adapter=AdapterSpec(
@@ -88,8 +80,9 @@ def available_providers() -> List[ProviderSpec]:
             adapter=AdapterSpec(
                 adapter_type="model-context-protocol",
                 module="llama_stack.providers.remote.tool_runtime.model_context_protocol",
-                config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.ModelContextProtocolConfig",
+                config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig",
                 pip_packages=["mcp"],
+                provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator",
             ),
         ),
     ]
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index 93031763d..d888c8420 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.providers.datatypes import (
     AdapterSpec,
@@ -15,7 +14,7 @@ from llama_stack.providers.datatypes import (
 )
 
 
-def available_providers() -> List[ProviderSpec]:
+def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.vector_io,
diff --git a/llama_stack/providers/remote/datasetio/huggingface/config.py b/llama_stack/providers/remote/datasetio/huggingface/config.py
index c06996b6f..38f933728 100644
--- a/llama_stack/providers/remote/datasetio/huggingface/config.py
+++ b/llama_stack/providers/remote/datasetio/huggingface/config.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -17,7 +17,7 @@ class HuggingfaceDatasetIOConfig(BaseModel):
     kvstore: KVStoreConfig
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "kvstore": SqliteKVStoreConfig.sample_run_config(
                 __distro_dir__=__distro_dir__,
diff --git a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index 7a17e5e42..fafd1d8ff 100644
--- a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Dict, List, Optional
+from typing import Any
 from urllib.parse import parse_qs, urlparse
 
 import datasets as hf_datasets
@@ -12,8 +12,8 @@ from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Dataset
 from llama_stack.providers.datatypes import DatasetsProtocolPrivate
-from llama_stack.providers.utils.datasetio.pagination import paginate_records
 from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.pagination import paginate_records
 
 from .config import HuggingfaceDatasetIOConfig
 
@@ -42,7 +42,7 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
         # Load existing datasets from kvstore
         start_key = DATASETS_PREFIX
         end_key = f"{DATASETS_PREFIX}\xff"
-        stored_datasets = await self.kvstore.range(start_key, end_key)
+        stored_datasets = await self.kvstore.values_in_range(start_key, end_key)
 
         for dataset in stored_datasets:
             dataset = Dataset.model_validate_json(dataset)
@@ -70,8 +70,8 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
     async def iterrows(
         self,
         dataset_id: str,
-        start_index: Optional[int] = None,
-        limit: Optional[int] = None,
+        start_index: int | None = None,
+        limit: int | None = None,
     ) -> PaginatedResponse:
         dataset_def = self.dataset_infos[dataset_id]
         path, params = parse_hf_params(dataset_def)
@@ -80,7 +80,7 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
         records = [loaded_dataset[i] for i in range(len(loaded_dataset))]
         return paginate_records(records, start_index, limit)
 
-    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
+    async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
         dataset_def = self.dataset_infos[dataset_id]
         path, params = parse_hf_params(dataset_def)
         loaded_dataset = hf_datasets.load_dataset(path, **params)
diff --git a/llama_stack/providers/remote/datasetio/nvidia/README.md b/llama_stack/providers/remote/datasetio/nvidia/README.md
new file mode 100644
index 000000000..1d3d15132
--- /dev/null
+++ b/llama_stack/providers/remote/datasetio/nvidia/README.md
@@ -0,0 +1,74 @@
+# NVIDIA DatasetIO Provider for LlamaStack
+
+This provider enables dataset management using NVIDIA's NeMo Customizer service.
+
+## Features
+
+- Register datasets for fine-tuning LLMs
+- Unregister datasets
+
+## Getting Started
+
+### Prerequisites
+
+- LlamaStack with NVIDIA configuration
+- Access to Hosted NVIDIA NeMo Microservice
+- API key for authentication with the NVIDIA service
+
+### Setup
+
+Build the NVIDIA environment:
+
+```bash
+llama stack build --template nvidia --image-type conda
+```
+
+### Basic Usage using the LlamaStack Python Client
+
+#### Initialize the client
+
+```python
+import os
+
+os.environ["NVIDIA_API_KEY"] = "your-api-key"
+os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
+os.environ["NVIDIA_USER_ID"] = "llama-stack-user"
+os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
+os.environ["NVIDIA_PROJECT_ID"] = "test-project"
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+```
+
+#### Register a dataset
+
+```python
+client.datasets.register(
+    purpose="post-training/messages",
+    dataset_id="my-training-dataset",
+    source={"type": "uri", "uri": "hf://datasets/default/sample-dataset"},
+    metadata={
+        "format": "json",
+        "description": "Dataset for LLM fine-tuning",
+        "provider": "nvidia",
+    },
+)
+```
+
+#### Get a list of all registered datasets
+
+```python
+datasets = client.datasets.list()
+for dataset in datasets:
+    print(f"Dataset ID: {dataset.identifier}")
+    print(f"Description: {dataset.metadata.get('description', '')}")
+    print(f"Source: {dataset.source.uri}")
+    print("---")
+```
+
+#### Unregister a dataset
+
+```python
+client.datasets.unregister(dataset_id="my-training-dataset")
+```
diff --git a/llama_stack/providers/remote/datasetio/nvidia/__init__.py b/llama_stack/providers/remote/datasetio/nvidia/__init__.py
new file mode 100644
index 000000000..418daec8d
--- /dev/null
+++ b/llama_stack/providers/remote/datasetio/nvidia/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .config import NvidiaDatasetIOConfig
+
+
+async def get_adapter_impl(
+    config: NvidiaDatasetIOConfig,
+    _deps,
+):
+    from .datasetio import NvidiaDatasetIOAdapter
+
+    if not isinstance(config, NvidiaDatasetIOConfig):
+        raise RuntimeError(f"Unexpected config type: {type(config)}")
+
+    impl = NvidiaDatasetIOAdapter(config)
+    return impl
+
+
+__all__ = ["get_adapter_impl", "NvidiaDatasetIOAdapter"]
diff --git a/llama_stack/providers/remote/datasetio/nvidia/config.py b/llama_stack/providers/remote/datasetio/nvidia/config.py
new file mode 100644
index 000000000..e616ce25c
--- /dev/null
+++ b/llama_stack/providers/remote/datasetio/nvidia/config.py
@@ -0,0 +1,61 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+import warnings
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class NvidiaDatasetIOConfig(BaseModel):
+    """Configuration for NVIDIA DatasetIO implementation."""
+
+    api_key: str | None = Field(
+        default_factory=lambda: os.getenv("NVIDIA_API_KEY"),
+        description="The NVIDIA API key.",
+    )
+
+    dataset_namespace: str | None = Field(
+        default_factory=lambda: os.getenv("NVIDIA_DATASET_NAMESPACE", "default"),
+        description="The NVIDIA dataset namespace.",
+    )
+
+    project_id: str | None = Field(
+        default_factory=lambda: os.getenv("NVIDIA_PROJECT_ID", "test-project"),
+        description="The NVIDIA project ID.",
+    )
+
+    datasets_url: str = Field(
+        default_factory=lambda: os.getenv("NVIDIA_DATASETS_URL", "http://nemo.test"),
+        description="Base URL for the NeMo Dataset API",
+    )
+
+    # warning for default values
+    def __post_init__(self):
+        default_values = []
+        if os.getenv("NVIDIA_PROJECT_ID") is None:
+            default_values.append("project_id='test-project'")
+        if os.getenv("NVIDIA_DATASET_NAMESPACE") is None:
+            default_values.append("dataset_namespace='default'")
+        if os.getenv("NVIDIA_DATASETS_URL") is None:
+            default_values.append("datasets_url='http://nemo.test'")
+
+        if default_values:
+            warnings.warn(
+                f"Using default values: {', '.join(default_values)}. \
+                          Please set the environment variables to avoid this default behavior.",
+                stacklevel=2,
+            )
+
+    @classmethod
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
+        return {
+            "api_key": "${env.NVIDIA_API_KEY:}",
+            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
+            "project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
+            "datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}",
+        }
diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
new file mode 100644
index 000000000..6a9e2bb58
--- /dev/null
+++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -0,0 +1,112 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+import aiohttp
+
+from llama_stack.apis.common.content_types import URL
+from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack.apis.common.type_system import ParamType
+from llama_stack.apis.datasets import Dataset
+
+from .config import NvidiaDatasetIOConfig
+
+
+class NvidiaDatasetIOAdapter:
+    """Nvidia NeMo DatasetIO API."""
+
+    def __init__(self, config: NvidiaDatasetIOConfig):
+        self.config = config
+        self.headers = {}
+
+    async def _make_request(
+        self,
+        method: str,
+        path: str,
+        headers: dict[str, Any] | None = None,
+        params: dict[str, Any] | None = None,
+        json: dict[str, Any] | None = None,
+        **kwargs,
+    ) -> dict[str, Any]:
+        """Helper method to make HTTP requests to the Customizer API."""
+        url = f"{self.config.datasets_url}{path}"
+        request_headers = self.headers.copy()
+
+        if headers:
+            request_headers.update(headers)
+
+        async with aiohttp.ClientSession(headers=request_headers) as session:
+            async with session.request(method, url, params=params, json=json, **kwargs) as response:
+                if response.status != 200:
+                    error_data = await response.json()
+                    raise Exception(f"API request failed: {error_data}")
+                return await response.json()
+
+    async def register_dataset(
+        self,
+        dataset_def: Dataset,
+    ) -> Dataset:
+        """Register a new dataset.
+
+        Args:
+            dataset_def [Dataset]: The dataset definition.
+                dataset_id [str]: The ID of the dataset.
+                source [DataSource]: The source of the dataset.
+                metadata [Dict[str, Any]]: The metadata of the dataset.
+                    format [str]: The format of the dataset.
+                    description [str]: The description of the dataset.
+        Returns:
+            Dataset
+        """
+        ## add warnings for unsupported params
+        request_body = {
+            "name": dataset_def.identifier,
+            "namespace": self.config.dataset_namespace,
+            "files_url": dataset_def.source.uri,
+            "project": self.config.project_id,
+        }
+        if dataset_def.metadata:
+            request_body["format"] = dataset_def.metadata.get("format")
+            request_body["description"] = dataset_def.metadata.get("description")
+        await self._make_request(
+            "POST",
+            "/v1/datasets",
+            json=request_body,
+        )
+        return dataset_def
+
+    async def update_dataset(
+        self,
+        dataset_id: str,
+        dataset_schema: dict[str, ParamType],
+        url: URL,
+        provider_dataset_id: str | None = None,
+        provider_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        raise NotImplementedError("Not implemented")
+
+    async def unregister_dataset(
+        self,
+        dataset_id: str,
+    ) -> None:
+        await self._make_request(
+            "DELETE",
+            f"/v1/datasets/{self.config.dataset_namespace}/{dataset_id}",
+            headers={"Accept": "application/json", "Content-Type": "application/json"},
+        )
+
+    async def iterrows(
+        self,
+        dataset_id: str,
+        start_index: int | None = None,
+        limit: int | None = None,
+    ) -> PaginatedResponse:
+        raise NotImplementedError("Not implemented")
+
+    async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
+        raise NotImplementedError("Not implemented")
diff --git a/llama_stack/templates/dev/__init__.py b/llama_stack/providers/remote/eval/__init__.py
similarity index 77%
rename from llama_stack/templates/dev/__init__.py
rename to llama_stack/providers/remote/eval/__init__.py
index cf966c2a6..756f351d8 100644
--- a/llama_stack/templates/dev/__init__.py
+++ b/llama_stack/providers/remote/eval/__init__.py
@@ -3,5 +3,3 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-
-from .dev import get_distribution_template  # noqa: F401
diff --git a/llama_stack/providers/remote/eval/nvidia/README.md b/llama_stack/providers/remote/eval/nvidia/README.md
new file mode 100644
index 000000000..cebc77920
--- /dev/null
+++ b/llama_stack/providers/remote/eval/nvidia/README.md
@@ -0,0 +1,134 @@
+# NVIDIA NeMo Evaluator Eval Provider
+
+
+## Overview
+
+For the first integration, Benchmarks are mapped to Evaluation Configs on in the NeMo Evaluator. The full evaluation config object is provided as part of the meta-data. The `dataset_id` and `scoring_functions` are not used.
+
+Below are a few examples of how to register a benchmark, which in turn will create an evaluation config in NeMo Evaluator and how to trigger an evaluation.
+
+### Example for register an academic benchmark
+
+```
+POST /eval/benchmarks
+```
+```json
+{
+  "benchmark_id": "mmlu",
+  "dataset_id": "",
+  "scoring_functions": [],
+  "metadata": {
+    "type": "mmlu"
+  }
+}
+```
+
+### Example for register a custom evaluation
+
+```
+POST /eval/benchmarks
+```
+```json
+{
+  "benchmark_id": "my-custom-benchmark",
+  "dataset_id": "",
+  "scoring_functions": [],
+  "metadata": {
+    "type": "custom",
+    "params": {
+      "parallelism": 8
+    },
+    "tasks": {
+      "qa": {
+        "type": "completion",
+        "params": {
+          "template": {
+            "prompt": "{{prompt}}",
+            "max_tokens": 200
+          }
+        },
+        "dataset": {
+          "files_url": "hf://datasets/default/sample-basic-test/testing/testing.jsonl"
+        },
+        "metrics": {
+          "bleu": {
+            "type": "bleu",
+            "params": {
+              "references": [
+                "{{ideal_response}}"
+              ]
+            }
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+### Example for triggering a benchmark/custom evaluation
+
+```
+POST /eval/benchmarks/{benchmark_id}/jobs
+```
+```json
+{
+  "benchmark_id": "my-custom-benchmark",
+  "benchmark_config": {
+    "eval_candidate": {
+      "type": "model",
+      "model": "meta-llama/Llama3.1-8B-Instruct",
+      "sampling_params": {
+        "max_tokens": 100,
+        "temperature": 0.7
+      }
+    },
+    "scoring_params": {}
+  }
+}
+```
+
+Response example:
+```json
+{
+    "job_id": "eval-1234",
+    "status": "in_progress"
+}
+```
+
+### Example for getting the status of a job
+```
+GET /eval/benchmarks/{benchmark_id}/jobs/{job_id}
+```
+
+Response example:
+```json
+{
+  "job_id": "eval-1234",
+  "status": "in_progress"
+}
+```
+
+### Example for cancelling a job
+```
+POST /eval/benchmarks/{benchmark_id}/jobs/{job_id}/cancel
+```
+
+### Example for getting the results
+```
+GET /eval/benchmarks/{benchmark_id}/results
+```
+```json
+{
+  "generations": [],
+  "scores": {
+    "{benchmark_id}": {
+      "score_rows": [],
+      "aggregated_results": {
+        "tasks": {},
+        "groups": {}
+      }
+    }
+  }
+}
+```
diff --git a/llama_stack/providers/remote/eval/nvidia/__init__.py b/llama_stack/providers/remote/eval/nvidia/__init__.py
new file mode 100644
index 000000000..55e3754f3
--- /dev/null
+++ b/llama_stack/providers/remote/eval/nvidia/__init__.py
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+
+from llama_stack.distribution.datatypes import Api
+
+from .config import NVIDIAEvalConfig
+
+
+async def get_adapter_impl(
+    config: NVIDIAEvalConfig,
+    deps: dict[Api, Any],
+):
+    from .eval import NVIDIAEvalImpl
+
+    impl = NVIDIAEvalImpl(
+        config,
+        deps[Api.datasetio],
+        deps[Api.datasets],
+        deps[Api.scoring],
+        deps[Api.inference],
+        deps[Api.agents],
+    )
+    await impl.initialize()
+    return impl
+
+
+__all__ = ["get_adapter_impl", "NVIDIAEvalImpl"]
diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/llama_stack/providers/remote/eval/nvidia/config.py
new file mode 100644
index 000000000..5c8f9ff76
--- /dev/null
+++ b/llama_stack/providers/remote/eval/nvidia/config.py
@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import os
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class NVIDIAEvalConfig(BaseModel):
+    """
+     Configuration for the NVIDIA NeMo Evaluator microservice endpoint.
+
+    Attributes:
+        evaluator_url (str): A base url for accessing the NVIDIA evaluation endpoint, e.g. http://localhost:8000.
+    """
+
+    evaluator_url: str = Field(
+        default_factory=lambda: os.getenv("NVIDIA_EVALUATOR_URL", "http://0.0.0.0:7331"),
+        description="The url for accessing the evaluator service",
+    )
+
+    @classmethod
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
+        return {
+            "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
+        }
diff --git a/llama_stack/providers/remote/eval/nvidia/eval.py b/llama_stack/providers/remote/eval/nvidia/eval.py
new file mode 100644
index 000000000..3572de0ef
--- /dev/null
+++ b/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -0,0 +1,154 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+
+import requests
+
+from llama_stack.apis.agents import Agents
+from llama_stack.apis.benchmarks import Benchmark
+from llama_stack.apis.datasetio import DatasetIO
+from llama_stack.apis.datasets import Datasets
+from llama_stack.apis.inference import Inference
+from llama_stack.apis.scoring import Scoring, ScoringResult
+from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
+from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
+
+from .....apis.common.job_types import Job, JobStatus
+from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
+from .config import NVIDIAEvalConfig
+
+DEFAULT_NAMESPACE = "nvidia"
+
+
+class NVIDIAEvalImpl(
+    Eval,
+    BenchmarksProtocolPrivate,
+    ModelRegistryHelper,
+):
+    def __init__(
+        self,
+        config: NVIDIAEvalConfig,
+        datasetio_api: DatasetIO,
+        datasets_api: Datasets,
+        scoring_api: Scoring,
+        inference_api: Inference,
+        agents_api: Agents,
+    ) -> None:
+        self.config = config
+        self.datasetio_api = datasetio_api
+        self.datasets_api = datasets_api
+        self.scoring_api = scoring_api
+        self.inference_api = inference_api
+        self.agents_api = agents_api
+
+        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
+
+    async def initialize(self) -> None: ...
+
+    async def shutdown(self) -> None: ...
+
+    async def _evaluator_get(self, path):
+        """Helper for making GET requests to the evaluator service."""
+        response = requests.get(url=f"{self.config.evaluator_url}{path}")
+        response.raise_for_status()
+        return response.json()
+
+    async def _evaluator_post(self, path, data):
+        """Helper for making POST requests to the evaluator service."""
+        response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data)
+        response.raise_for_status()
+        return response.json()
+
+    async def register_benchmark(self, task_def: Benchmark) -> None:
+        """Register a benchmark as an evaluation configuration."""
+        await self._evaluator_post(
+            "/v1/evaluation/configs",
+            {
+                "namespace": DEFAULT_NAMESPACE,
+                "name": task_def.benchmark_id,
+                # metadata is copied to request body as-is
+                **task_def.metadata,
+            },
+        )
+
+    async def run_eval(
+        self,
+        benchmark_id: str,
+        benchmark_config: BenchmarkConfig,
+    ) -> Job:
+        """Run an evaluation job for a benchmark."""
+        model = (
+            benchmark_config.eval_candidate.model
+            if benchmark_config.eval_candidate.type == "model"
+            else benchmark_config.eval_candidate.config.model
+        )
+        nvidia_model = self.get_provider_model_id(model) or model
+
+        result = await self._evaluator_post(
+            "/v1/evaluation/jobs",
+            {
+                "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
+                "target": {"type": "model", "model": nvidia_model},
+            },
+        )
+
+        return Job(job_id=result["id"], status=JobStatus.in_progress)
+
+    async def evaluate_rows(
+        self,
+        benchmark_id: str,
+        input_rows: list[dict[str, Any]],
+        scoring_functions: list[str],
+        benchmark_config: BenchmarkConfig,
+    ) -> EvaluateResponse:
+        raise NotImplementedError()
+
+    async def job_status(self, benchmark_id: str, job_id: str) -> Job:
+        """Get the status of an evaluation job.
+
+        EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed".
+        JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed"
+        """
+        result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}")
+        result_status = result["status"]
+
+        job_status = JobStatus.failed
+        if result_status in ["created", "pending"]:
+            job_status = JobStatus.scheduled
+        elif result_status in ["running"]:
+            job_status = JobStatus.in_progress
+        elif result_status in ["completed"]:
+            job_status = JobStatus.completed
+        elif result_status in ["cancelled"]:
+            job_status = JobStatus.cancelled
+
+        return Job(job_id=job_id, status=job_status)
+
+    async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
+        """Cancel the evaluation job."""
+        await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {})
+
+    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
+        """Returns the results of the evaluation job."""
+
+        job = await self.job_status(benchmark_id, job_id)
+        status = job.status
+        if not status or status != JobStatus.completed:
+            raise ValueError(f"Job {job_id} not completed. Status: {status.value}")
+
+        result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results")
+
+        return EvaluateResponse(
+            # TODO: these are stored in detailed results on NeMo Evaluator side; can be added
+            generations=[],
+            scores={
+                benchmark_id: ScoringResult(
+                    score_rows=[],
+                    aggregated_results=result,
+                )
+            },
+        )
diff --git a/llama_stack/providers/remote/inference/anthropic/__init__.py b/llama_stack/providers/remote/inference/anthropic/__init__.py
index 3075f856e..8b420a5a0 100644
--- a/llama_stack/providers/remote/inference/anthropic/__init__.py
+++ b/llama_stack/providers/remote/inference/anthropic/__init__.py
@@ -4,15 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Optional
-
 from pydantic import BaseModel
 
 from .config import AnthropicConfig
 
 
 class AnthropicProviderDataValidator(BaseModel):
-    anthropic_api_key: Optional[str] = None
+    anthropic_api_key: str | None = None
 
 
 async def get_adapter_impl(config: AnthropicConfig, _deps):
diff --git a/llama_stack/providers/remote/inference/anthropic/config.py b/llama_stack/providers/remote/inference/anthropic/config.py
index 0e9469602..10da0025e 100644
--- a/llama_stack/providers/remote/inference/anthropic/config.py
+++ b/llama_stack/providers/remote/inference/anthropic/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -12,7 +12,7 @@ from llama_stack.schema_utils import json_schema_type
 
 
 class AnthropicProviderDataValidator(BaseModel):
-    anthropic_api_key: Optional[str] = Field(
+    anthropic_api_key: str | None = Field(
         default=None,
         description="API key for Anthropic models",
     )
@@ -20,13 +20,13 @@ class AnthropicProviderDataValidator(BaseModel):
 
 @json_schema_type
 class AnthropicConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="API key for Anthropic models",
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "api_key": api_key,
         }
diff --git a/llama_stack/providers/remote/inference/bedrock/__init__.py b/llama_stack/providers/remote/inference/bedrock/__init__.py
index e72c6ada9..4d98f4999 100644
--- a/llama_stack/providers/remote/inference/bedrock/__init__.py
+++ b/llama_stack/providers/remote/inference/bedrock/__init__.py
@@ -1,18 +1,18 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from .config import BedrockConfig
-
-
-async def get_adapter_impl(config: BedrockConfig, _deps):
-    from .bedrock import BedrockInferenceAdapter
-
-    assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}"
-
-    impl = BedrockInferenceAdapter(config)
-
-    await impl.initialize()
-
-    return impl
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from .config import BedrockConfig
+
+
+async def get_adapter_impl(config: BedrockConfig, _deps):
+    from .bedrock import BedrockInferenceAdapter
+
+    assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}"
+
+    impl = BedrockInferenceAdapter(config)
+
+    await impl.initialize()
+
+    return impl
diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 0a485da8f..952d86f1a 100644
--- a/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import json
-from typing import AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
+from collections.abc import AsyncGenerator, AsyncIterator
 
 from botocore.client import BaseClient
 
@@ -22,6 +22,7 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -36,10 +37,10 @@ from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
     OpenAICompatCompletionChoice,
     OpenAICompatCompletionResponse,
-    OpenAICompletionUnsupportedMixin,
+    OpenAICompletionToLlamaStackMixin,
     get_sampling_strategy_options,
     process_chat_completion_response,
     process_chat_completion_stream_response,
@@ -56,8 +57,8 @@ from .models import MODEL_ENTRIES
 class BedrockInferenceAdapter(
     ModelRegistryHelper,
     Inference,
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
 ):
     def __init__(self, config: BedrockConfig) -> None:
         ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
@@ -79,26 +80,26 @@ class BedrockInferenceAdapter(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> AsyncGenerator:
         raise NotImplementedError()
 
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
-    ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
+    ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
         if sampling_params is None:
             sampling_params = SamplingParams()
         model = await self.model_store.get_model(model_id)
@@ -151,7 +152,7 @@ class BedrockInferenceAdapter(
         async for chunk in process_chat_completion_stream_response(stream, request):
             yield chunk
 
-    async def _get_params_for_chat_completion(self, request: ChatCompletionRequest) -> Dict:
+    async def _get_params_for_chat_completion(self, request: ChatCompletionRequest) -> dict:
         bedrock_model = request.model
 
         sampling_params = request.sampling_params
@@ -176,10 +177,10 @@ class BedrockInferenceAdapter(
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         model = await self.model_store.get_model(model_id)
         embeddings = []
@@ -197,3 +198,13 @@ class BedrockInferenceAdapter(
             response_body = json.loads(response.get("body").read())
             embeddings.append(response_body.get("embedding"))
         return EmbeddingsResponse(embeddings=embeddings)
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
diff --git a/llama_stack/providers/remote/inference/bedrock/config.py b/llama_stack/providers/remote/inference/bedrock/config.py
index f2e8930be..5961a2f15 100644
--- a/llama_stack/providers/remote/inference/bedrock/config.py
+++ b/llama_stack/providers/remote/inference/bedrock/config.py
@@ -1,11 +1,11 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
-
-
-class BedrockConfig(BedrockBaseConfig):
-    pass
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
+
+
+class BedrockConfig(BedrockBaseConfig):
+    pass
diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 5e0a5b484..952118e24 100644
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import AsyncGenerator, List, Optional, Union
+from collections.abc import AsyncGenerator
 
 from cerebras.cloud.sdk import AsyncCerebras
 
@@ -21,6 +21,7 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -34,8 +35,8 @@ from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
     get_sampling_options,
     process_chat_completion_response,
     process_chat_completion_stream_response,
@@ -54,8 +55,8 @@ from .models import MODEL_ENTRIES
 class CerebrasInferenceAdapter(
     ModelRegistryHelper,
     Inference,
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
 ):
     def __init__(self, config: CerebrasImplConfig) -> None:
         ModelRegistryHelper.__init__(
@@ -79,10 +80,10 @@ class CerebrasInferenceAdapter(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -120,15 +121,15 @@ class CerebrasInferenceAdapter(
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -166,7 +167,7 @@ class CerebrasInferenceAdapter(
         async for chunk in process_chat_completion_stream_response(stream, request):
             yield chunk
 
-    async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
+    async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
         if request.sampling_params and isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
             raise ValueError("`top_k` not supported by Cerebras")
 
@@ -188,9 +189,19 @@ class CerebrasInferenceAdapter(
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         raise NotImplementedError()
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
diff --git a/llama_stack/providers/remote/inference/cerebras/config.py b/llama_stack/providers/remote/inference/cerebras/config.py
index 81682c980..81312ec76 100644
--- a/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/llama_stack/providers/remote/inference/cerebras/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import os
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field, SecretStr
 
@@ -20,13 +20,13 @@ class CerebrasImplConfig(BaseModel):
         default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
         description="Base URL for the Cerebras API",
     )
-    api_key: Optional[SecretStr] = Field(
+    api_key: SecretStr | None = Field(
         default=os.environ.get("CEREBRAS_API_KEY"),
         description="Cerebras API Key",
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
             "base_url": DEFAULT_BASE_URL,
             "api_key": "${env.CEREBRAS_API_KEY}",
diff --git a/llama_stack/providers/remote/inference/cerebras_openai_compat/__init__.py b/llama_stack/providers/remote/inference/cerebras_openai_compat/__init__.py
index a5f07edd2..523a8dfe7 100644
--- a/llama_stack/providers/remote/inference/cerebras_openai_compat/__init__.py
+++ b/llama_stack/providers/remote/inference/cerebras_openai_compat/__init__.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import Inference
+from llama_stack.apis.inference import InferenceProvider
 
 from .config import CerebrasCompatConfig
 
 
-async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> Inference:
+async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> InferenceProvider:
     # import dynamically so the import is used only when it is needed
     from .cerebras import CerebrasCompatInferenceAdapter
 
diff --git a/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py b/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py
index 149c0a202..cb8daff6a 100644
--- a/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -12,7 +12,7 @@ from llama_stack.schema_utils import json_schema_type
 
 
 class CerebrasProviderDataValidator(BaseModel):
-    cerebras_api_key: Optional[str] = Field(
+    cerebras_api_key: str | None = Field(
         default=None,
         description="API key for Cerebras models",
     )
@@ -20,7 +20,7 @@ class CerebrasProviderDataValidator(BaseModel):
 
 @json_schema_type
 class CerebrasCompatConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="The Cerebras API key",
     )
@@ -31,7 +31,7 @@ class CerebrasCompatConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "openai_compat_api_base": "https://api.cerebras.ai/v1",
             "api_key": api_key,
diff --git a/llama_stack/providers/remote/inference/databricks/config.py b/llama_stack/providers/remote/inference/databricks/config.py
index 1d51125cb..5710dcef3 100644
--- a/llama_stack/providers/remote/inference/databricks/config.py
+++ b/llama_stack/providers/remote/inference/databricks/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -28,7 +28,7 @@ class DatabricksImplConfig(BaseModel):
         url: str = "${env.DATABRICKS_URL}",
         api_token: str = "${env.DATABRICKS_API_TOKEN}",
         **kwargs: Any,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         return {
             "url": url,
             "api_token": api_token,
diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py
index a10878b27..1dc18b97f 100644
--- a/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import AsyncGenerator, List, Optional
+from collections.abc import AsyncGenerator
 
 from openai import OpenAI
 
@@ -20,6 +20,7 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -34,8 +35,8 @@ from llama_stack.providers.utils.inference.model_registry import (
     build_hf_repo_model_entry,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
     get_sampling_options,
     process_chat_completion_response,
     process_chat_completion_stream_response,
@@ -61,8 +62,8 @@ model_entries = [
 class DatabricksInferenceAdapter(
     ModelRegistryHelper,
     Inference,
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
 ):
     def __init__(self, config: DatabricksImplConfig) -> None:
         ModelRegistryHelper.__init__(self, model_entries=model_entries)
@@ -78,25 +79,25 @@ class DatabricksInferenceAdapter(
         self,
         model: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> AsyncGenerator:
         raise NotImplementedError()
 
     async def chat_completion(
         self,
         model: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -146,9 +147,19 @@ class DatabricksInferenceAdapter(
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         raise NotImplementedError()
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
diff --git a/llama_stack/providers/remote/inference/fireworks/config.py b/llama_stack/providers/remote/inference/fireworks/config.py
index c21ce4a40..072d558f4 100644
--- a/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/llama_stack/providers/remote/inference/fireworks/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field, SecretStr
 
@@ -17,13 +17,13 @@ class FireworksImplConfig(BaseModel):
         default="https://api.fireworks.ai/inference/v1",
         description="The URL for the Fireworks server",
     )
-    api_key: Optional[SecretStr] = Field(
+    api_key: SecretStr | None = Field(
         default=None,
         description="The Fireworks.ai API Key",
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "url": "https://api.fireworks.ai/inference/v1",
             "api_key": api_key,
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py
index b59e9f2cb..fe21685dd 100644
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from collections.abc import AsyncGenerator, AsyncIterator
+from typing import Any
 
 from fireworks.client import Fireworks
 from openai import AsyncOpenAI
@@ -32,13 +33,21 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+)
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
+    OpenAIChatCompletionToLlamaStackMixin,
     convert_message_to_openai_dict,
     get_sampling_options,
     prepare_openai_completion_params,
@@ -98,10 +107,10 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -139,9 +148,9 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
 
     def _build_options(
         self,
-        sampling_params: Optional[SamplingParams],
+        sampling_params: SamplingParams | None,
         fmt: ResponseFormat,
-        logprobs: Optional[LogProbConfig],
+        logprobs: LogProbConfig | None,
     ) -> dict:
         options = get_sampling_options(sampling_params)
         options.setdefault("max_tokens", 512)
@@ -170,15 +179,15 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -222,7 +231,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
         async for chunk in process_chat_completion_stream_response(stream, request):
             yield chunk
 
-    async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
+    async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
         input_dict = {}
         media_present = request_has_media(request)
 
@@ -256,10 +265,10 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         model = await self.model_store.get_model(model_id)
 
@@ -278,29 +287,44 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
         embeddings = [data.embedding for data in response.data]
         return EmbeddingsResponse(embeddings=embeddings)
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
     async def openai_completion(
         self,
         model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
     ) -> OpenAICompletion:
         model_obj = await self.model_store.get_model(model)
+
+        # Fireworks always prepends with BOS
+        if isinstance(prompt, str) and prompt.startswith("<|begin_of_text|>"):
+            prompt = prompt[len("<|begin_of_text|>") :]
+
         params = await prepare_openai_completion_params(
             model=model_obj.provider_resource_id,
             prompt=prompt,
@@ -320,37 +344,70 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
             top_p=top_p,
             user=user,
         )
+
         return await self._get_openai_client().completions.create(**params)
 
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIMessageParam],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-    ) -> OpenAIChatCompletion:
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
         model_obj = await self.model_store.get_model(model)
+
+        # Divert Llama Models through Llama Stack inference APIs because
+        # Fireworks chat completions OpenAI-compatible API does not support
+        # tool calls properly.
+        llama_model = self.get_llama_model(model_obj.provider_resource_id)
+        if llama_model:
+            return await OpenAIChatCompletionToLlamaStackMixin.openai_chat_completion(
+                self,
+                model=model,
+                messages=messages,
+                frequency_penalty=frequency_penalty,
+                function_call=function_call,
+                functions=functions,
+                logit_bias=logit_bias,
+                logprobs=logprobs,
+                max_completion_tokens=max_completion_tokens,
+                max_tokens=max_tokens,
+                n=n,
+                parallel_tool_calls=parallel_tool_calls,
+                presence_penalty=presence_penalty,
+                response_format=response_format,
+                seed=seed,
+                stop=stop,
+                stream=stream,
+                stream_options=stream_options,
+                temperature=temperature,
+                tool_choice=tool_choice,
+                tools=tools,
+                top_logprobs=top_logprobs,
+                top_p=top_p,
+                user=user,
+            )
+
         params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id,
             messages=messages,
             frequency_penalty=frequency_penalty,
             function_call=function_call,
@@ -374,4 +431,5 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
             top_p=top_p,
             user=user,
         )
-        return await self._get_openai_client().chat.completions.create(**params)
+
+        return await self._get_openai_client().chat.completions.create(model=model_obj.provider_resource_id, **params)
diff --git a/llama_stack/providers/remote/inference/fireworks_openai_compat/__init__.py b/llama_stack/providers/remote/inference/fireworks_openai_compat/__init__.py
index f78f218b5..15a666cb6 100644
--- a/llama_stack/providers/remote/inference/fireworks_openai_compat/__init__.py
+++ b/llama_stack/providers/remote/inference/fireworks_openai_compat/__init__.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import Inference
+from llama_stack.apis.inference import InferenceProvider
 
 from .config import FireworksCompatConfig
 
 
-async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> Inference:
+async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> InferenceProvider:
     # import dynamically so the import is used only when it is needed
     from .fireworks import FireworksCompatInferenceAdapter
 
diff --git a/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py b/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py
index 0263d348a..bf38cdd2b 100644
--- a/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -12,7 +12,7 @@ from llama_stack.schema_utils import json_schema_type
 
 
 class FireworksProviderDataValidator(BaseModel):
-    fireworks_api_key: Optional[str] = Field(
+    fireworks_api_key: str | None = Field(
         default=None,
         description="API key for Fireworks models",
     )
@@ -20,7 +20,7 @@ class FireworksProviderDataValidator(BaseModel):
 
 @json_schema_type
 class FireworksCompatConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="The Fireworks API key",
     )
@@ -31,7 +31,7 @@ class FireworksCompatConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "openai_compat_api_base": "https://api.fireworks.ai/inference/v1",
             "api_key": api_key,
diff --git a/llama_stack/providers/remote/inference/gemini/__init__.py b/llama_stack/providers/remote/inference/gemini/__init__.py
index dd972f21c..9d35da893 100644
--- a/llama_stack/providers/remote/inference/gemini/__init__.py
+++ b/llama_stack/providers/remote/inference/gemini/__init__.py
@@ -4,15 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Optional
-
 from pydantic import BaseModel
 
 from .config import GeminiConfig
 
 
 class GeminiProviderDataValidator(BaseModel):
-    gemini_api_key: Optional[str] = None
+    gemini_api_key: str | None = None
 
 
 async def get_adapter_impl(config: GeminiConfig, _deps):
diff --git a/llama_stack/providers/remote/inference/gemini/config.py b/llama_stack/providers/remote/inference/gemini/config.py
index 30c8d9913..63ef4de01 100644
--- a/llama_stack/providers/remote/inference/gemini/config.py
+++ b/llama_stack/providers/remote/inference/gemini/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -12,7 +12,7 @@ from llama_stack.schema_utils import json_schema_type
 
 
 class GeminiProviderDataValidator(BaseModel):
-    gemini_api_key: Optional[str] = Field(
+    gemini_api_key: str | None = Field(
         default=None,
         description="API key for Gemini models",
     )
@@ -20,13 +20,13 @@ class GeminiProviderDataValidator(BaseModel):
 
 @json_schema_type
 class GeminiConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="API key for Gemini models",
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "api_key": api_key,
         }
diff --git a/llama_stack/providers/remote/inference/groq/config.py b/llama_stack/providers/remote/inference/groq/config.py
index 8a1204b0b..fe060507a 100644
--- a/llama_stack/providers/remote/inference/groq/config.py
+++ b/llama_stack/providers/remote/inference/groq/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -12,7 +12,7 @@ from llama_stack.schema_utils import json_schema_type
 
 
 class GroqProviderDataValidator(BaseModel):
-    groq_api_key: Optional[str] = Field(
+    groq_api_key: str | None = Field(
         default=None,
         description="API key for Groq models",
     )
@@ -20,7 +20,7 @@ class GroqProviderDataValidator(BaseModel):
 
 @json_schema_type
 class GroqConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         # The Groq client library loads the GROQ_API_KEY environment variable by default
         default=None,
         description="The Groq API key",
@@ -32,7 +32,7 @@ class GroqConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "url": "https://api.groq.com",
             "api_key": api_key,
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py
index c8789434f..27d7d7961 100644
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@@ -4,8 +4,25 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from collections.abc import AsyncIterator
+from typing import Any
+
+from openai import AsyncOpenAI
+
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChoiceDelta,
+    OpenAIChunkChoice,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+    OpenAISystemMessageParam,
+)
 from llama_stack.providers.remote.inference.groq.config import GroqConfig
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+from llama_stack.providers.utils.inference.openai_compat import (
+    prepare_openai_completion_params,
+)
 
 from .models import MODEL_ENTRIES
 
@@ -21,9 +38,129 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
             provider_data_api_key_field="groq_api_key",
         )
         self.config = config
+        self._openai_client = None
 
     async def initialize(self):
         await super().initialize()
 
     async def shutdown(self):
         await super().shutdown()
+        if self._openai_client:
+            await self._openai_client.close()
+            self._openai_client = None
+
+    def _get_openai_client(self) -> AsyncOpenAI:
+        if not self._openai_client:
+            self._openai_client = AsyncOpenAI(
+                base_url=f"{self.config.url}/openai/v1",
+                api_key=self.config.api_key,
+            )
+        return self._openai_client
+
+    async def openai_chat_completion(
+        self,
+        model: str,
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        model_obj = await self.model_store.get_model(model)
+
+        # Groq does not support json_schema response format, so we need to convert it to json_object
+        if response_format and response_format.type == "json_schema":
+            response_format.type = "json_object"
+            schema = response_format.json_schema.get("schema", {})
+            response_format.json_schema = None
+            json_instructions = f"\nYour response should be a JSON object that matches the following schema: {schema}"
+            if messages and messages[0].role == "system":
+                messages[0].content = messages[0].content + json_instructions
+            else:
+                messages.insert(0, OpenAISystemMessageParam(content=json_instructions))
+
+        # Groq returns a 400 error if tools are provided but none are called
+        # So, set tool_choice to "required" to attempt to force a call
+        if tools and (not tool_choice or tool_choice == "auto"):
+            tool_choice = "required"
+
+        params = await prepare_openai_completion_params(
+            model=model_obj.provider_resource_id.replace("groq/", ""),
+            messages=messages,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            presence_penalty=presence_penalty,
+            response_format=response_format,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+        )
+
+        # Groq does not support streaming requests that set response_format
+        fake_stream = False
+        if stream and response_format:
+            params["stream"] = False
+            fake_stream = True
+
+        response = await self._get_openai_client().chat.completions.create(**params)
+
+        if fake_stream:
+            chunk_choices = []
+            for choice in response.choices:
+                delta = OpenAIChoiceDelta(
+                    content=choice.message.content,
+                    role=choice.message.role,
+                    tool_calls=choice.message.tool_calls,
+                )
+                chunk_choice = OpenAIChunkChoice(
+                    delta=delta,
+                    finish_reason=choice.finish_reason,
+                    index=choice.index,
+                    logprobs=None,
+                )
+                chunk_choices.append(chunk_choice)
+            chunk = OpenAIChatCompletionChunk(
+                id=response.id,
+                choices=chunk_choices,
+                object="chat.completion.chunk",
+                created=response.created,
+                model=response.model,
+            )
+
+            async def _fake_stream_generator():
+                yield chunk
+
+            return _fake_stream_generator()
+        else:
+            return response
diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py
index d0c10ca62..0b4b81cfe 100644
--- a/llama_stack/providers/remote/inference/groq/models.py
+++ b/llama_stack/providers/remote/inference/groq/models.py
@@ -39,8 +39,16 @@ MODEL_ENTRIES = [
         "groq/llama-4-scout-17b-16e-instruct",
         CoreModelId.llama4_scout_17b_16e_instruct.value,
     ),
+    build_hf_repo_model_entry(
+        "groq/meta-llama/llama-4-scout-17b-16e-instruct",
+        CoreModelId.llama4_scout_17b_16e_instruct.value,
+    ),
     build_hf_repo_model_entry(
         "groq/llama-4-maverick-17b-128e-instruct",
         CoreModelId.llama4_maverick_17b_128e_instruct.value,
     ),
+    build_hf_repo_model_entry(
+        "groq/meta-llama/llama-4-maverick-17b-128e-instruct",
+        CoreModelId.llama4_maverick_17b_128e_instruct.value,
+    ),
 ]
diff --git a/llama_stack/providers/remote/inference/groq_openai_compat/__init__.py b/llama_stack/providers/remote/inference/groq_openai_compat/__init__.py
index 8161df20d..794cdebd7 100644
--- a/llama_stack/providers/remote/inference/groq_openai_compat/__init__.py
+++ b/llama_stack/providers/remote/inference/groq_openai_compat/__init__.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import Inference
+from llama_stack.apis.inference import InferenceProvider
 
 from .config import GroqCompatConfig
 
 
-async def get_adapter_impl(config: GroqCompatConfig, _deps) -> Inference:
+async def get_adapter_impl(config: GroqCompatConfig, _deps) -> InferenceProvider:
     # import dynamically so the import is used only when it is needed
     from .groq import GroqCompatInferenceAdapter
 
diff --git a/llama_stack/providers/remote/inference/groq_openai_compat/config.py b/llama_stack/providers/remote/inference/groq_openai_compat/config.py
index 4b90b4576..481f740f9 100644
--- a/llama_stack/providers/remote/inference/groq_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/groq_openai_compat/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -12,7 +12,7 @@ from llama_stack.schema_utils import json_schema_type
 
 
 class GroqProviderDataValidator(BaseModel):
-    groq_api_key: Optional[str] = Field(
+    groq_api_key: str | None = Field(
         default=None,
         description="API key for Groq models",
     )
@@ -20,7 +20,7 @@ class GroqProviderDataValidator(BaseModel):
 
 @json_schema_type
 class GroqCompatConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="The Groq API key",
     )
@@ -31,7 +31,7 @@ class GroqCompatConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "openai_compat_api_base": "https://api.groq.com/openai/v1",
             "api_key": api_key,
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py b/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py
new file mode 100644
index 000000000..be48d1067
--- /dev/null
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import InferenceProvider
+
+from .config import LlamaCompatConfig
+
+
+async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> InferenceProvider:
+    # import dynamically so the import is used only when it is needed
+    from .llama import LlamaCompatInferenceAdapter
+
+    adapter = LlamaCompatInferenceAdapter(config)
+    return adapter
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/llama_stack/providers/remote/inference/llama_openai_compat/config.py
new file mode 100644
index 000000000..57bc7240d
--- /dev/null
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class LlamaProviderDataValidator(BaseModel):
+    llama_api_key: str | None = Field(
+        default=None,
+        description="API key for api.llama models",
+    )
+
+
+@json_schema_type
+class LlamaCompatConfig(BaseModel):
+    api_key: str | None = Field(
+        default=None,
+        description="The Llama API key",
+    )
+
+    openai_compat_api_base: str = Field(
+        default="https://api.llama.com/compat/v1/",
+        description="The URL for the Llama API server",
+    )
+
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
+        return {
+            "openai_compat_api_base": "https://api.llama.com/compat/v1/",
+            "api_key": api_key,
+        }
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
new file mode 100644
index 000000000..29b5e889a
--- /dev/null
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.remote.inference.llama_openai_compat.config import (
+    LlamaCompatConfig,
+)
+from llama_stack.providers.utils.inference.litellm_openai_mixin import (
+    LiteLLMOpenAIMixin,
+)
+
+from .models import MODEL_ENTRIES
+
+
+class LlamaCompatInferenceAdapter(LiteLLMOpenAIMixin):
+    _config: LlamaCompatConfig
+
+    def __init__(self, config: LlamaCompatConfig):
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            model_entries=MODEL_ENTRIES,
+            api_key_from_config=config.api_key,
+            provider_data_api_key_field="llama_api_key",
+            openai_compat_api_base=config.openai_compat_api_base,
+        )
+        self.config = config
+
+    async def initialize(self):
+        await super().initialize()
+
+    async def shutdown(self):
+        await super().shutdown()
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/models.py b/llama_stack/providers/remote/inference/llama_openai_compat/models.py
new file mode 100644
index 000000000..6285e98e1
--- /dev/null
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/models.py
@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_hf_repo_model_entry,
+)
+
+MODEL_ENTRIES = [
+    build_hf_repo_model_entry(
+        "Llama-3.3-70B-Instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "Llama-4-Scout-17B-16E-Instruct-FP8",
+        CoreModelId.llama4_scout_17b_16e_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "Llama-4-Maverick-17B-128E-Instruct-FP8",
+        CoreModelId.llama4_maverick_17b_128e_instruct.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
new file mode 100644
index 000000000..a353c67f5
--- /dev/null
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@@ -0,0 +1,85 @@
+# NVIDIA Inference Provider for LlamaStack
+
+This provider enables running inference using NVIDIA NIM.
+
+## Features
+- Endpoints for completions, chat completions, and embeddings for registered models
+
+## Getting Started
+
+### Prerequisites
+
+- LlamaStack with NVIDIA configuration
+- Access to NVIDIA NIM deployment
+- NIM for model to use for inference is deployed
+
+### Setup
+
+Build the NVIDIA environment:
+
+```bash
+llama stack build --template nvidia --image-type conda
+```
+
+### Basic Usage using the LlamaStack Python Client
+
+#### Initialize the client
+
+```python
+import os
+
+os.environ["NVIDIA_API_KEY"] = (
+    ""  # Required if using hosted NIM endpoint. If self-hosted, not required.
+)
+os.environ["NVIDIA_BASE_URL"] = "http://nim.test"  # NIM URL
+
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+```
+
+### Create Completion
+
+```python
+response = client.completion(
+    model_id="meta-llama/Llama-3.1-8b-Instruct",
+    content="Complete the sentence using one word: Roses are red, violets are :",
+    stream=False,
+    sampling_params={
+        "max_tokens": 50,
+    },
+)
+print(f"Response: {response.content}")
+```
+
+### Create Chat Completion
+
+```python
+response = client.chat_completion(
+    model_id="meta-llama/Llama-3.1-8b-Instruct",
+    messages=[
+        {
+            "role": "system",
+            "content": "You must respond to each message with only one word",
+        },
+        {
+            "role": "user",
+            "content": "Complete the sentence using one word: Roses are red, violets are:",
+        },
+    ],
+    stream=False,
+    sampling_params={
+        "max_tokens": 50,
+    },
+)
+print(f"Response: {response.completion_message.content}")
+```
+
+### Create Embeddings
+```python
+response = client.embeddings(
+    model_id="meta-llama/Llama-3.1-8b-Instruct", contents=["foo", "bar", "baz"]
+)
+print(f"Embeddings: {response.embeddings}")
+```
diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py
index abd34b498..4c449edc2 100644
--- a/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/llama_stack/providers/remote/inference/nvidia/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import os
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field, SecretStr
 
@@ -39,7 +39,7 @@ class NVIDIAConfig(BaseModel):
         default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"),
         description="A base url for accessing the NVIDIA NIM",
     )
-    api_key: Optional[SecretStr] = Field(
+    api_key: SecretStr | None = Field(
         default_factory=lambda: os.getenv("NVIDIA_API_KEY"),
         description="The NVIDIA API key, only needed of using the hosted service",
     )
@@ -47,10 +47,15 @@ class NVIDIAConfig(BaseModel):
         default=60,
         description="Timeout for the HTTP requests",
     )
+    append_api_version: bool = Field(
+        default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
+        description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
+    )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
             "url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}",
             "api_key": "${env.NVIDIA_API_KEY:}",
+            "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}",
         }
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
index 964125148..127a6ca59 100644
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@@ -48,6 +48,10 @@ MODEL_ENTRIES = [
         "meta/llama-3.2-90b-vision-instruct",
         CoreModelId.llama3_2_90b_vision_instruct.value,
     ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
     # NeMo Retriever Text Embedding models -
     #
     # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index d6f717719..4c68322e0 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -6,8 +6,9 @@
 
 import logging
 import warnings
+from collections.abc import AsyncIterator
 from functools import lru_cache
-from typing import Any, AsyncIterator, Dict, List, Optional, Union
+from typing import Any
 
 from openai import APIConnectionError, AsyncOpenAI, BadRequestError
 
@@ -28,15 +29,25 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
     ToolChoice,
     ToolConfig,
-    ToolDefinition,
 )
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
-from llama_stack.models.llama.datatypes import ToolPromptFormat
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+)
+from llama_stack.apis.models import Model, ModelType
+from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
+from llama_stack.providers.utils.inference import (
+    ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
+)
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
 )
@@ -114,21 +125,29 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
             "meta/llama-3.2-90b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct",
         }
 
-        base_url = f"{self._config.url}/v1"
+        base_url = f"{self._config.url}/v1" if self._config.append_api_version else self._config.url
+
         if _is_nvidia_hosted(self._config) and provider_model_id in special_model_urls:
             base_url = special_model_urls[provider_model_id]
-
         return _get_client_for_base_url(base_url)
 
+    async def _get_provider_model_id(self, model_id: str) -> str:
+        if not self.model_store:
+            raise RuntimeError("Model store is not set")
+        model = await self.model_store.get_model(model_id)
+        if model is None:
+            raise ValueError(f"Model {model_id} is unknown")
+        return model.provider_model_id
+
     async def completion(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+    ) -> CompletionResponse | AsyncIterator[CompletionResponseStreamChunk]:
         if sampling_params is None:
             sampling_params = SamplingParams()
         if content_has_media(content):
@@ -138,7 +157,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
         # removing this health check as NeMo customizer endpoint health check is returning 404
         # await check_health(self._config)  # this raises errors
 
-        provider_model_id = self.get_provider_model_id(model_id)
+        provider_model_id = await self._get_provider_model_id(model_id)
         request = convert_completion_request(
             request=CompletionRequest(
                 model=provider_model_id,
@@ -165,24 +184,24 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         if any(content_has_media(content) for content in contents):
             raise NotImplementedError("Media is not supported")
 
         #
-        # Llama Stack: contents = List[str] | List[InterleavedContentItem]
+        # Llama Stack: contents = list[str] | list[InterleavedContentItem]
         #  ->
-        # OpenAI: input = str | List[str]
+        # OpenAI: input = str | list[str]
         #
-        # we can ignore str and always pass List[str] to OpenAI
+        # we can ignore str and always pass list[str] to OpenAI
         #
         flat_contents = [content.text if isinstance(content, TextContentItem) else content for content in contents]
         input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents]
-        model = self.get_provider_model_id(model_id)
+        provider_model_id = await self._get_provider_model_id(model_id)
 
         extra_body = {}
 
@@ -205,8 +224,8 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
             extra_body["input_type"] = task_type_options[task_type]
 
         try:
-            response = await self._get_client(model).embeddings.create(
-                model=model,
+            response = await self._get_client(provider_model_id).embeddings.create(
+                model=provider_model_id,
                 input=input,
                 extra_body=extra_body,
             )
@@ -214,25 +233,35 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
             raise ValueError(f"Failed to get embeddings: {e}") from e
 
         #
-        # OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=List[float], ...)], ...)
+        # OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=list[float], ...)], ...)
         #  ->
-        # Llama Stack: EmbeddingsResponse(embeddings=List[List[float]])
+        # Llama Stack: EmbeddingsResponse(embeddings=list[list[float]])
         #
         return EmbeddingsResponse(embeddings=[embedding.embedding for embedding in response.data])
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
-    ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
+    ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
         if sampling_params is None:
             sampling_params = SamplingParams()
         if tool_prompt_format:
@@ -240,10 +269,10 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
 
         # await check_health(self._config)  # this raises errors
 
-        provider_model_id = self.get_provider_model_id(model_id)
+        provider_model_id = await self._get_provider_model_id(model_id)
         request = await convert_chat_completion_request(
             request=ChatCompletionRequest(
-                model=self.get_provider_model_id(model_id),
+                model=provider_model_id,
                 messages=messages,
                 sampling_params=sampling_params,
                 response_format=response_format,
@@ -269,26 +298,26 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
     async def openai_completion(
         self,
         model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
     ) -> OpenAICompletion:
-        provider_model_id = self.get_provider_model_id(model)
+        provider_model_id = await self._get_provider_model_id(model)
 
         params = await prepare_openai_completion_params(
             model=provider_model_id,
@@ -318,30 +347,30 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIMessageParam],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-    ) -> OpenAIChatCompletion:
-        provider_model_id = self.get_provider_model_id(model)
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        provider_model_id = await self._get_provider_model_id(model)
 
         params = await prepare_openai_completion_params(
             model=provider_model_id,
@@ -373,3 +402,44 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
             return await self._get_client(provider_model_id).chat.completions.create(**params)
         except APIConnectionError as e:
             raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e
+
+    async def register_model(self, model: Model) -> Model:
+        """
+        Allow non-llama model registration.
+
+        Non-llama model registration: API Catalogue models, post-training models, etc.
+            client = LlamaStackAsLibraryClient("nvidia")
+            client.models.register(
+                    model_id="mistralai/mixtral-8x7b-instruct-v0.1",
+                    model_type=ModelType.llm,
+                    provider_id="nvidia",
+                    provider_model_id="mistralai/mixtral-8x7b-instruct-v0.1"
+            )
+
+            NOTE: Only supports models endpoints compatible with AsyncOpenAI base_url format.
+        """
+        if model.model_type == ModelType.embedding:
+            # embedding models are always registered by their provider model id and does not need to be mapped to a llama model
+            provider_resource_id = model.provider_resource_id
+        else:
+            provider_resource_id = self.get_provider_model_id(model.provider_resource_id)
+
+        if provider_resource_id:
+            model.provider_resource_id = provider_resource_id
+        else:
+            llama_model = model.metadata.get("llama_model")
+            existing_llama_model = self.get_llama_model(model.provider_resource_id)
+            if existing_llama_model:
+                if existing_llama_model != llama_model:
+                    raise ValueError(
+                        f"Provider model id '{model.provider_resource_id}' is already registered to a different llama model: '{existing_llama_model}'"
+                    )
+            else:
+                # not llama model
+                if llama_model in ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR:
+                    self.provider_id_to_llama_model_map[model.provider_resource_id] = (
+                        ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR[llama_model]
+                    )
+                else:
+                    self.alias_to_provider_id_map[model.provider_model_id] = model.provider_model_id
+        return model
diff --git a/llama_stack/providers/remote/inference/nvidia/openai_utils.py b/llama_stack/providers/remote/inference/nvidia/openai_utils.py
index 3f2769b26..0b0d7fcf3 100644
--- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py
+++ b/llama_stack/providers/remote/inference/nvidia/openai_utils.py
@@ -5,7 +5,8 @@
 # the root directory of this source tree.
 
 import warnings
-from typing import Any, AsyncGenerator, Dict, List, Optional
+from collections.abc import AsyncGenerator
+from typing import Any
 
 from openai import AsyncStream
 from openai.types.chat.chat_completion import (
@@ -64,7 +65,7 @@ async def convert_chat_completion_request(
         )
 
     nvext = {}
-    payload: Dict[str, Any] = dict(
+    payload: dict[str, Any] = dict(
         model=request.model,
         messages=[await convert_message_to_openai_dict_new(message) for message in request.messages],
         stream=request.stream,
@@ -137,7 +138,7 @@ def convert_completion_request(
     # logprobs.top_k -> logprobs
 
     nvext = {}
-    payload: Dict[str, Any] = dict(
+    payload: dict[str, Any] = dict(
         model=request.model,
         prompt=request.content,
         stream=request.stream,
@@ -176,8 +177,8 @@ def convert_completion_request(
 
 
 def _convert_openai_completion_logprobs(
-    logprobs: Optional[OpenAICompletionLogprobs],
-) -> Optional[List[TokenLogProbs]]:
+    logprobs: OpenAICompletionLogprobs | None,
+) -> list[TokenLogProbs] | None:
     """
     Convert an OpenAI CompletionLogprobs into a list of TokenLogProbs.
     """
diff --git a/llama_stack/providers/remote/inference/nvidia/utils.py b/llama_stack/providers/remote/inference/nvidia/utils.py
index 7d3f3f27e..74019999e 100644
--- a/llama_stack/providers/remote/inference/nvidia/utils.py
+++ b/llama_stack/providers/remote/inference/nvidia/utils.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import logging
-from typing import Tuple
 
 import httpx
 
@@ -18,7 +17,7 @@ def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
     return "integrate.api.nvidia.com" in config.url
 
 
-async def _get_health(url: str) -> Tuple[bool, bool]:
+async def _get_health(url: str) -> tuple[bool, bool]:
     """
     Query {url}/v1/health/{live,ready} to check if the server is running and ready
 
diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py
index a5a4d48ab..0e4aef0e1 100644
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -15,5 +15,5 @@ class OllamaImplConfig(BaseModel):
     url: str = DEFAULT_OLLAMA_URL
 
     @classmethod
-    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> dict[str, Any]:
         return {"url": url}
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 33b48af46..8863e0edc 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -5,10 +5,11 @@
 # the root directory of this source tree.
 
 
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from collections.abc import AsyncGenerator, AsyncIterator
+from typing import Any
 
 import httpx
-from ollama import AsyncClient
+from ollama import AsyncClient  # type: ignore[attr-defined]
 from openai import AsyncOpenAI
 
 from llama_stack.apis.common.content_types import (
@@ -27,10 +28,11 @@ from llama_stack.apis.inference import (
     EmbeddingsResponse,
     EmbeddingTaskType,
     GrammarResponseFormat,
-    Inference,
+    InferenceProvider,
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -39,10 +41,20 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ModelsProtocolPrivate
+from llama_stack.providers.datatypes import (
+    HealthResponse,
+    HealthStatus,
+    ModelsProtocolPrivate,
+)
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
 )
@@ -50,6 +62,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
     OpenAICompatCompletionChoice,
     OpenAICompatCompletionResponse,
     get_sampling_options,
+    prepare_openai_completion_params,
     process_chat_completion_response,
     process_chat_completion_stream_response,
     process_completion_response,
@@ -70,7 +83,7 @@ logger = get_logger(name=__name__, category="inference")
 
 
 class OllamaInferenceAdapter(
-    Inference,
+    InferenceProvider,
     ModelsProtocolPrivate,
 ):
     def __init__(self, url: str) -> None:
@@ -87,8 +100,19 @@ class OllamaInferenceAdapter(
 
     async def initialize(self) -> None:
         logger.info(f"checking connectivity to Ollama at `{self.url}`...")
+        await self.health()
+
+    async def health(self) -> HealthResponse:
+        """
+        Performs a health check by verifying connectivity to the Ollama server.
+        This method is used by initialize() and the Provider API to verify that the service is running
+        correctly.
+        Returns:
+            HealthResponse: A dictionary containing the health status.
+        """
         try:
             await self.client.ps()
+            return HealthResponse(status=HealthStatus.OK)
         except httpx.ConnectError as e:
             raise RuntimeError(
                 "Ollama Server is not running, start it using `ollama serve` in a separate terminal"
@@ -109,14 +133,16 @@ class OllamaInferenceAdapter(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> CompletionResponse | AsyncGenerator[CompletionResponseStreamChunk, None]:
         if sampling_params is None:
             sampling_params = SamplingParams()
         model = await self._get_model(model_id)
+        if model.provider_resource_id is None:
+            raise ValueError(f"Model {model_id} has no provider_resource_id set")
         request = CompletionRequest(
             model=model.provider_resource_id,
             content=content,
@@ -167,19 +193,21 @@ class OllamaInferenceAdapter(
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> ChatCompletionResponse | AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
         if sampling_params is None:
             sampling_params = SamplingParams()
         model = await self._get_model(model_id)
+        if model.provider_resource_id is None:
+            raise ValueError(f"Model {model_id} has no provider_resource_id set")
         request = ChatCompletionRequest(
             model=model.provider_resource_id,
             messages=messages,
@@ -195,7 +223,7 @@ class OllamaInferenceAdapter(
         else:
             return await self._nonstream_chat_completion(request)
 
-    async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
+    async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
         sampling_options = get_sampling_options(request.sampling_params)
         # This is needed since the Ollama API expects num_predict to be set
         # for early truncation instead of max_tokens.
@@ -293,10 +321,10 @@ class OllamaInferenceAdapter(
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         model = await self._get_model(model_id)
 
@@ -312,7 +340,10 @@ class OllamaInferenceAdapter(
         return EmbeddingsResponse(embeddings=embeddings)
 
     async def register_model(self, model: Model) -> Model:
-        model = await self.register_helper.register_model(model)
+        try:
+            model = await self.register_helper.register_model(model)
+        except ValueError:
+            pass  # Ignore statically unknown model, will check live listing
         if model.model_type == ModelType.embedding:
             logger.info(f"Pulling embedding model `{model.provider_resource_id}` if necessary...")
             await self.client.pull(model.provider_resource_id)
@@ -321,146 +352,160 @@ class OllamaInferenceAdapter(
         #  - models not currently running are run by the ollama server as needed
         response = await self.client.list()
         available_models = [m["model"] for m in response["models"]]
-        if model.provider_resource_id not in available_models:
+        if model.provider_resource_id is None:
+            raise ValueError("Model provider_resource_id cannot be None")
+        provider_resource_id = self.register_helper.get_provider_model_id(model.provider_resource_id)
+        if provider_resource_id is None:
+            provider_resource_id = model.provider_resource_id
+        if provider_resource_id not in available_models:
+            available_models_latest = [m["model"].split(":latest")[0] for m in response["models"]]
+            if provider_resource_id in available_models_latest:
+                logger.warning(
+                    f"Imprecise provider resource id was used but 'latest' is available in Ollama - using '{model.provider_resource_id}:latest'"
+                )
+                return model
             raise ValueError(
                 f"Model '{model.provider_resource_id}' is not available in Ollama. Available models: {', '.join(available_models)}"
             )
+        model.provider_resource_id = provider_resource_id
 
         return model
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
     async def openai_completion(
         self,
         model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
     ) -> OpenAICompletion:
         if not isinstance(prompt, str):
             raise ValueError("Ollama does not support non-string prompts for completion")
 
         model_obj = await self._get_model(model)
-        params = {
-            k: v
-            for k, v in {
-                "model": model_obj.provider_resource_id,
-                "prompt": prompt,
-                "best_of": best_of,
-                "echo": echo,
-                "frequency_penalty": frequency_penalty,
-                "logit_bias": logit_bias,
-                "logprobs": logprobs,
-                "max_tokens": max_tokens,
-                "n": n,
-                "presence_penalty": presence_penalty,
-                "seed": seed,
-                "stop": stop,
-                "stream": stream,
-                "stream_options": stream_options,
-                "temperature": temperature,
-                "top_p": top_p,
-                "user": user,
-            }.items()
-            if v is not None
-        }
+        params = await prepare_openai_completion_params(
+            model=model_obj.provider_resource_id,
+            prompt=prompt,
+            best_of=best_of,
+            echo=echo,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_tokens=max_tokens,
+            n=n,
+            presence_penalty=presence_penalty,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            top_p=top_p,
+            user=user,
+        )
         return await self.openai_client.completions.create(**params)  # type: ignore
 
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIMessageParam],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-    ) -> OpenAIChatCompletion:
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
         model_obj = await self._get_model(model)
-        params = {
-            k: v
-            for k, v in {
-                "model": model_obj.provider_resource_id,
-                "messages": messages,
-                "frequency_penalty": frequency_penalty,
-                "function_call": function_call,
-                "functions": functions,
-                "logit_bias": logit_bias,
-                "logprobs": logprobs,
-                "max_completion_tokens": max_completion_tokens,
-                "max_tokens": max_tokens,
-                "n": n,
-                "parallel_tool_calls": parallel_tool_calls,
-                "presence_penalty": presence_penalty,
-                "response_format": response_format,
-                "seed": seed,
-                "stop": stop,
-                "stream": stream,
-                "stream_options": stream_options,
-                "temperature": temperature,
-                "tool_choice": tool_choice,
-                "tools": tools,
-                "top_logprobs": top_logprobs,
-                "top_p": top_p,
-                "user": user,
-            }.items()
-            if v is not None
-        }
+        params = await prepare_openai_completion_params(
+            model=model_obj.provider_resource_id,
+            messages=messages,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            presence_penalty=presence_penalty,
+            response_format=response_format,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+        )
         return await self.openai_client.chat.completions.create(**params)  # type: ignore
 
     async def batch_completion(
         self,
         model_id: str,
-        content_batch: List[InterleavedContent],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        content_batch: list[InterleavedContent],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ):
         raise NotImplementedError("Batch completion is not supported for Ollama")
 
     async def batch_chat_completion(
         self,
         model_id: str,
-        messages_batch: List[List[Message]],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        messages_batch: list[list[Message]],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_config: ToolConfig | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ):
         raise NotImplementedError("Batch chat completion is not supported for Ollama")
 
 
-async def convert_message_to_openai_dict_for_ollama(message: Message) -> List[dict]:
+async def convert_message_to_openai_dict_for_ollama(message: Message) -> list[dict]:
     async def _convert_content(content) -> dict:
         if isinstance(content, ImageContentItem):
             return {
diff --git a/llama_stack/providers/remote/inference/openai/__init__.py b/llama_stack/providers/remote/inference/openai/__init__.py
index 000a03d33..c245dbe10 100644
--- a/llama_stack/providers/remote/inference/openai/__init__.py
+++ b/llama_stack/providers/remote/inference/openai/__init__.py
@@ -4,15 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Optional
-
 from pydantic import BaseModel
 
 from .config import OpenAIConfig
 
 
 class OpenAIProviderDataValidator(BaseModel):
-    openai_api_key: Optional[str] = None
+    openai_api_key: str | None = None
 
 
 async def get_adapter_impl(config: OpenAIConfig, _deps):
diff --git a/llama_stack/providers/remote/inference/openai/config.py b/llama_stack/providers/remote/inference/openai/config.py
index 2b0cc2c10..17fb98831 100644
--- a/llama_stack/providers/remote/inference/openai/config.py
+++ b/llama_stack/providers/remote/inference/openai/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -12,7 +12,7 @@ from llama_stack.schema_utils import json_schema_type
 
 
 class OpenAIProviderDataValidator(BaseModel):
-    openai_api_key: Optional[str] = Field(
+    openai_api_key: str | None = Field(
         default=None,
         description="API key for OpenAI models",
     )
@@ -20,13 +20,13 @@ class OpenAIProviderDataValidator(BaseModel):
 
 @json_schema_type
 class OpenAIConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="API key for OpenAI models",
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "api_key": api_key,
         }
diff --git a/llama_stack/providers/remote/inference/openai/models.py b/llama_stack/providers/remote/inference/openai/models.py
index 1737043a4..e029c456c 100644
--- a/llama_stack/providers/remote/inference/openai/models.py
+++ b/llama_stack/providers/remote/inference/openai/models.py
@@ -4,27 +4,60 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from dataclasses import dataclass
+
 from llama_stack.apis.models.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
 
 LLM_MODEL_IDS = [
+    # the models w/ "openai/" prefix are the litellm specific model names.
+    # they should be deprecated in favor of the canonical openai model names.
     "openai/gpt-4o",
     "openai/gpt-4o-mini",
     "openai/chatgpt-4o-latest",
+    "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-instruct",
+    "gpt-4",
+    "gpt-4-turbo",
+    "gpt-4o",
+    "gpt-4o-2024-08-06",
+    "gpt-4o-mini",
+    "gpt-4o-audio-preview",
+    "chatgpt-4o-latest",
+    "o1",
+    "o1-mini",
+    "o3-mini",
+    "o4-mini",
 ]
 
 
+@dataclass
+class EmbeddingModelInfo:
+    """Structured representation of embedding model information."""
+
+    embedding_dimension: int
+    context_length: int
+
+
+EMBEDDING_MODEL_IDS: dict[str, EmbeddingModelInfo] = {
+    "openai/text-embedding-3-small": EmbeddingModelInfo(1536, 8192),
+    "openai/text-embedding-3-large": EmbeddingModelInfo(3072, 8192),
+    "text-embedding-3-small": EmbeddingModelInfo(1536, 8192),
+    "text-embedding-3-large": EmbeddingModelInfo(3072, 8192),
+}
+
+
 MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + [
     ProviderModelEntry(
-        provider_model_id="openai/text-embedding-3-small",
+        provider_model_id=model_id,
         model_type=ModelType.embedding,
-        metadata={"embedding_dimension": 1536, "context_length": 8192},
-    ),
-    ProviderModelEntry(
-        provider_model_id="openai/text-embedding-3-large",
-        model_type=ModelType.embedding,
-        metadata={"embedding_dimension": 3072, "context_length": 8192},
-    ),
+        metadata={
+            "embedding_dimension": model_info.embedding_dimension,
+            "context_length": model_info.context_length,
+        },
+    )
+    for model_id, model_info in EMBEDDING_MODEL_IDS.items()
 ]
diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py
index 6b9c02e6c..6f3a686a8 100644
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@@ -4,12 +4,45 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import logging
+from collections.abc import AsyncIterator
+from typing import Any
+
+from openai import AsyncOpenAI
+
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+)
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
 
 from .config import OpenAIConfig
 from .models import MODEL_ENTRIES
 
+logger = logging.getLogger(__name__)
 
+
+#
+# This OpenAI adapter implements Inference methods using two clients -
+#
+# | Inference Method           | Implementation Source    |
+# |----------------------------|--------------------------|
+# | completion                 | LiteLLMOpenAIMixin       |
+# | chat_completion            | LiteLLMOpenAIMixin       |
+# | embedding                  | LiteLLMOpenAIMixin       |
+# | batch_completion           | LiteLLMOpenAIMixin       |
+# | batch_chat_completion      | LiteLLMOpenAIMixin       |
+# | openai_completion          | AsyncOpenAI              |
+# | openai_chat_completion     | AsyncOpenAI              |
+# | openai_embeddings          | AsyncOpenAI              |
+#
 class OpenAIInferenceAdapter(LiteLLMOpenAIMixin):
     def __init__(self, config: OpenAIConfig) -> None:
         LiteLLMOpenAIMixin.__init__(
@@ -19,9 +52,174 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin):
             provider_data_api_key_field="openai_api_key",
         )
         self.config = config
+        # we set is_openai_compat so users can use the canonical
+        # openai model names like "gpt-4" or "gpt-3.5-turbo"
+        # and the model name will be translated to litellm's
+        # "openai/gpt-4" or "openai/gpt-3.5-turbo" transparently.
+        # if we do not set this, users will be exposed to the
+        # litellm specific model names, an abstraction leak.
+        self.is_openai_compat = True
+        self._openai_client = AsyncOpenAI(
+            api_key=self.config.api_key,
+        )
 
     async def initialize(self) -> None:
         await super().initialize()
 
     async def shutdown(self) -> None:
         await super().shutdown()
+
+    async def openai_completion(
+        self,
+        model: str,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
+    ) -> OpenAICompletion:
+        if guided_choice is not None:
+            logging.warning("guided_choice is not supported by the OpenAI API. Ignoring.")
+        if prompt_logprobs is not None:
+            logging.warning("prompt_logprobs is not supported by the OpenAI API. Ignoring.")
+
+        model_id = (await self.model_store.get_model(model)).provider_resource_id
+        if model_id.startswith("openai/"):
+            model_id = model_id[len("openai/") :]
+        params = await prepare_openai_completion_params(
+            model=model_id,
+            prompt=prompt,
+            best_of=best_of,
+            echo=echo,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_tokens=max_tokens,
+            n=n,
+            presence_penalty=presence_penalty,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            top_p=top_p,
+            user=user,
+        )
+        return await self._openai_client.completions.create(**params)
+
+    async def openai_chat_completion(
+        self,
+        model: str,
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        model_id = (await self.model_store.get_model(model)).provider_resource_id
+        if model_id.startswith("openai/"):
+            model_id = model_id[len("openai/") :]
+        params = await prepare_openai_completion_params(
+            model=model_id,
+            messages=messages,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            presence_penalty=presence_penalty,
+            response_format=response_format,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+        )
+        return await self._openai_client.chat.completions.create(**params)
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        model_id = (await self.model_store.get_model(model)).provider_resource_id
+        if model_id.startswith("openai/"):
+            model_id = model_id[len("openai/") :]
+
+        # Prepare parameters for OpenAI embeddings API
+        params = {
+            "model": model_id,
+            "input": input,
+        }
+
+        if encoding_format is not None:
+            params["encoding_format"] = encoding_format
+        if dimensions is not None:
+            params["dimensions"] = dimensions
+        if user is not None:
+            params["user"] = user
+
+        # Call OpenAI embeddings API
+        response = await self._openai_client.embeddings.create(**params)
+
+        data = []
+        for i, embedding_data in enumerate(response.data):
+            data.append(
+                OpenAIEmbeddingData(
+                    embedding=embedding_data.embedding,
+                    index=i,
+                )
+            )
+
+        usage = OpenAIEmbeddingUsage(
+            prompt_tokens=response.usage.prompt_tokens,
+            total_tokens=response.usage.total_tokens,
+        )
+
+        return OpenAIEmbeddingsResponse(
+            data=data,
+            model=response.model,
+            usage=usage,
+        )
diff --git a/llama_stack/providers/remote/inference/passthrough/config.py b/llama_stack/providers/remote/inference/passthrough/config.py
index 46325e428..ce41495ce 100644
--- a/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/llama_stack/providers/remote/inference/passthrough/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field, SecretStr
 
@@ -18,13 +18,13 @@ class PassthroughImplConfig(BaseModel):
         description="The URL for the passthrough endpoint",
     )
 
-    api_key: Optional[SecretStr] = Field(
+    api_key: SecretStr | None = Field(
         default=None,
         description="API Key for the passthrouth endpoint",
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
             "url": "${env.PASSTHROUGH_URL}",
             "api_key": "${env.PASSTHROUGH_API_KEY}",
diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 0eb38c395..6cf4680e2 100644
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from collections.abc import AsyncGenerator, AsyncIterator
+from typing import Any
 
 from llama_stack_client import AsyncLlamaStackClient
 
@@ -18,6 +19,7 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -26,7 +28,13 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+)
 from llama_stack.apis.models import Model
 from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
@@ -87,10 +95,10 @@ class PassthroughInferenceAdapter(Inference):
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -117,15 +125,15 @@ class PassthroughInferenceAdapter(Inference):
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -159,7 +167,7 @@ class PassthroughInferenceAdapter(Inference):
         else:
             return await self._nonstream_chat_completion(json_params)
 
-    async def _nonstream_chat_completion(self, json_params: Dict[str, Any]) -> ChatCompletionResponse:
+    async def _nonstream_chat_completion(self, json_params: dict[str, Any]) -> ChatCompletionResponse:
         client = self._get_client()
         response = await client.inference.chat_completion(**json_params)
 
@@ -172,7 +180,7 @@ class PassthroughInferenceAdapter(Inference):
             logprobs=response.logprobs,
         )
 
-    async def _stream_chat_completion(self, json_params: Dict[str, Any]) -> AsyncGenerator:
+    async def _stream_chat_completion(self, json_params: dict[str, Any]) -> AsyncGenerator:
         client = self._get_client()
         stream_response = await client.inference.chat_completion(**json_params)
 
@@ -187,10 +195,10 @@ class PassthroughInferenceAdapter(Inference):
     async def embeddings(
         self,
         model_id: str,
-        contents: List[InterleavedContent],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[InterleavedContent],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         client = self._get_client()
         model = await self.model_store.get_model(model_id)
@@ -203,27 +211,37 @@ class PassthroughInferenceAdapter(Inference):
             task_type=task_type,
         )
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
     async def openai_completion(
         self,
         model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
     ) -> OpenAICompletion:
         client = self._get_client()
         model_obj = await self.model_store.get_model(model)
@@ -255,29 +273,29 @@ class PassthroughInferenceAdapter(Inference):
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIMessageParam],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-    ) -> OpenAIChatCompletion:
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
         client = self._get_client()
         model_obj = await self.model_store.get_model(model)
 
@@ -309,7 +327,7 @@ class PassthroughInferenceAdapter(Inference):
 
         return await client.inference.openai_chat_completion(**params)
 
-    def cast_value_to_json_dict(self, request_params: Dict[str, Any]) -> Dict[str, Any]:
+    def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]:
         json_params = {}
         for key, value in request_params.items():
             json_input = convert_pydantic_to_json_value(value)
diff --git a/llama_stack/providers/remote/inference/runpod/config.py b/llama_stack/providers/remote/inference/runpod/config.py
index 377a7fe6a..e3913dc35 100644
--- a/llama_stack/providers/remote/inference/runpod/config.py
+++ b/llama_stack/providers/remote/inference/runpod/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -13,17 +13,17 @@ from llama_stack.schema_utils import json_schema_type
 
 @json_schema_type
 class RunpodImplConfig(BaseModel):
-    url: Optional[str] = Field(
+    url: str | None = Field(
         default=None,
         description="The URL for the Runpod model serving endpoint",
     )
-    api_token: Optional[str] = Field(
+    api_token: str | None = Field(
         default=None,
         description="The API token",
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
             "url": "${env.RUNPOD_URL:}",
             "api_token": "${env.RUNPOD_API_TOKEN:}",
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py
index 878460122..f8c98893e 100644
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -3,17 +3,18 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import AsyncGenerator
+from collections.abc import AsyncGenerator
 
 from openai import OpenAI
 
 from llama_stack.apis.inference import *  # noqa: F403
+from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse
 
 # from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
     get_sampling_options,
     process_chat_completion_response,
     process_chat_completion_stream_response,
@@ -43,8 +44,8 @@ RUNPOD_SUPPORTED_MODELS = {
 class RunpodInferenceAdapter(
     ModelRegistryHelper,
     Inference,
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
 ):
     def __init__(self, config: RunpodImplConfig) -> None:
         ModelRegistryHelper.__init__(self, stack_to_provider_models_map=RUNPOD_SUPPORTED_MODELS)
@@ -134,3 +135,13 @@ class RunpodInferenceAdapter(
         task_type: Optional[EmbeddingTaskType] = None,
     ) -> EmbeddingsResponse:
         raise NotImplementedError()
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
diff --git a/llama_stack/providers/remote/inference/sambanova/__init__.py b/llama_stack/providers/remote/inference/sambanova/__init__.py
index 3e682e69c..a3a7b8fbd 100644
--- a/llama_stack/providers/remote/inference/sambanova/__init__.py
+++ b/llama_stack/providers/remote/inference/sambanova/__init__.py
@@ -4,16 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from pydantic import BaseModel
+from llama_stack.apis.inference import Inference
 
 from .config import SambaNovaImplConfig
 
 
-class SambaNovaProviderDataValidator(BaseModel):
-    sambanova_api_key: str
-
-
-async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
+async def get_adapter_impl(config: SambaNovaImplConfig, _deps) -> Inference:
     from .sambanova import SambaNovaInferenceAdapter
 
     assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
diff --git a/llama_stack/providers/remote/inference/sambanova/config.py b/llama_stack/providers/remote/inference/sambanova/config.py
index a30c29b74..abbf9430f 100644
--- a/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/llama_stack/providers/remote/inference/sambanova/config.py
@@ -4,27 +4,34 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.schema_utils import json_schema_type
 
 
+class SambaNovaProviderDataValidator(BaseModel):
+    sambanova_api_key: str | None = Field(
+        default=None,
+        description="Sambanova Cloud API key",
+    )
+
+
 @json_schema_type
 class SambaNovaImplConfig(BaseModel):
     url: str = Field(
         default="https://api.sambanova.ai/v1",
         description="The URL for the SambaNova AI server",
     )
-    api_key: Optional[str] = Field(
+    api_key: SecretStr | None = Field(
         default=None,
-        description="The SambaNova.ai API Key",
+        description="The SambaNova cloud API Key",
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "url": "https://api.sambanova.ai/v1",
-            "api_key": "${env.SAMBANOVA_API_KEY}",
+            "api_key": api_key,
         }
diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py
index 43041e94a..9954fa7a0 100644
--- a/llama_stack/providers/remote/inference/sambanova/models.py
+++ b/llama_stack/providers/remote/inference/sambanova/models.py
@@ -11,43 +11,43 @@ from llama_stack.providers.utils.inference.model_registry import (
 
 MODEL_ENTRIES = [
     build_hf_repo_model_entry(
-        "Meta-Llama-3.1-8B-Instruct",
+        "sambanova/Meta-Llama-3.1-8B-Instruct",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
     build_hf_repo_model_entry(
-        "Meta-Llama-3.1-70B-Instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "Meta-Llama-3.1-405B-Instruct",
+        "sambanova/Meta-Llama-3.1-405B-Instruct",
         CoreModelId.llama3_1_405b_instruct.value,
     ),
     build_hf_repo_model_entry(
-        "Meta-Llama-3.2-1B-Instruct",
+        "sambanova/Meta-Llama-3.2-1B-Instruct",
         CoreModelId.llama3_2_1b_instruct.value,
     ),
     build_hf_repo_model_entry(
-        "Meta-Llama-3.2-3B-Instruct",
+        "sambanova/Meta-Llama-3.2-3B-Instruct",
         CoreModelId.llama3_2_3b_instruct.value,
     ),
     build_hf_repo_model_entry(
-        "Meta-Llama-3.3-70B-Instruct",
+        "sambanova/Meta-Llama-3.3-70B-Instruct",
         CoreModelId.llama3_3_70b_instruct.value,
     ),
     build_hf_repo_model_entry(
-        "Llama-3.2-11B-Vision-Instruct",
+        "sambanova/Llama-3.2-11B-Vision-Instruct",
         CoreModelId.llama3_2_11b_vision_instruct.value,
     ),
     build_hf_repo_model_entry(
-        "Llama-3.2-90B-Vision-Instruct",
+        "sambanova/Llama-3.2-90B-Vision-Instruct",
         CoreModelId.llama3_2_90b_vision_instruct.value,
     ),
     build_hf_repo_model_entry(
-        "Meta-Llama-Guard-3-8B",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-    build_hf_repo_model_entry(
-        "Llama-4-Scout-17B-16E-Instruct",
+        "sambanova/Llama-4-Scout-17B-16E-Instruct",
         CoreModelId.llama4_scout_17b_16e_instruct.value,
     ),
+    build_hf_repo_model_entry(
+        "sambanova/Llama-4-Maverick-17B-128E-Instruct",
+        CoreModelId.llama4_maverick_17b_128e_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "sambanova/Meta-Llama-Guard-3-8B",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
 ]
diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py
index c503657eb..20f863665 100644
--- a/llama_stack/providers/remote/inference/sambanova/sambanova.py
+++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py
@@ -5,305 +5,249 @@
 # the root directory of this source tree.
 
 import json
-from typing import AsyncGenerator, List, Optional
+from collections.abc import Iterable
 
-from openai import OpenAI
+from openai.types.chat import (
+    ChatCompletionAssistantMessageParam as OpenAIChatCompletionAssistantMessage,
+)
+from openai.types.chat import (
+    ChatCompletionContentPartImageParam as OpenAIChatCompletionContentPartImageParam,
+)
+from openai.types.chat import (
+    ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam,
+)
+from openai.types.chat import (
+    ChatCompletionContentPartTextParam as OpenAIChatCompletionContentPartTextParam,
+)
+from openai.types.chat import (
+    ChatCompletionMessageParam as OpenAIChatCompletionMessage,
+)
+from openai.types.chat import (
+    ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall,
+)
+from openai.types.chat import (
+    ChatCompletionSystemMessageParam as OpenAIChatCompletionSystemMessage,
+)
+from openai.types.chat import (
+    ChatCompletionToolMessageParam as OpenAIChatCompletionToolMessage,
+)
+from openai.types.chat import (
+    ChatCompletionUserMessageParam as OpenAIChatCompletionUserMessage,
+)
+from openai.types.chat.chat_completion_content_part_image_param import (
+    ImageURL as OpenAIImageURL,
+)
+from openai.types.chat.chat_completion_message_tool_call_param import (
+    Function as OpenAIFunction,
+)
 
 from llama_stack.apis.common.content_types import (
     ImageContentItem,
     InterleavedContent,
-    InterleavedContentItem,
     TextContentItem,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
-    ChatCompletionResponse,
     CompletionMessage,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
-    GreedySamplingStrategy,
-    Inference,
-    LogProbConfig,
+    JsonSchemaResponseFormat,
     Message,
-    ResponseFormat,
-    SamplingParams,
-    StopReason,
     SystemMessage,
-    TextTruncation,
-    ToolCall,
     ToolChoice,
-    ToolConfig,
-    ToolDefinition,
-    ToolPromptFormat,
     ToolResponseMessage,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
     UserMessage,
 )
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
+from llama_stack.log import get_logger
+from llama_stack.models.llama.datatypes import BuiltinTool
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
-    process_chat_completion_stream_response,
-)
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    convert_image_content_to_url,
+    convert_tooldef_to_openai_tool,
+    get_sampling_options,
 )
+from llama_stack.providers.utils.inference.prompt_adapter import convert_image_content_to_url
 
 from .config import SambaNovaImplConfig
 from .models import MODEL_ENTRIES
 
+logger = get_logger(name=__name__, category="inference")
 
-class SambaNovaInferenceAdapter(
-    ModelRegistryHelper,
-    Inference,
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
-):
-    def __init__(self, config: SambaNovaImplConfig) -> None:
-        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
-        self.config = config
 
-    async def initialize(self) -> None:
-        return
+async def convert_message_to_openai_dict_with_b64_images(
+    message: Message | dict,
+) -> OpenAIChatCompletionMessage:
+    """
+    Convert a Message to an OpenAI API-compatible dictionary.
+    """
+    # users can supply a dict instead of a Message object, we'll
+    # convert it to a Message object and proceed with some type safety.
+    if isinstance(message, dict):
+        if "role" not in message:
+            raise ValueError("role is required in message")
+        if message["role"] == "user":
+            message = UserMessage(**message)
+        elif message["role"] == "assistant":
+            message = CompletionMessage(**message)
+        elif message["role"] == "tool":
+            message = ToolResponseMessage(**message)
+        elif message["role"] == "system":
+            message = SystemMessage(**message)
+        else:
+            raise ValueError(f"Unsupported message role: {message['role']}")
 
-    async def shutdown(self) -> None:
-        pass
-
-    def _get_client(self) -> OpenAI:
-        return OpenAI(base_url=self.config.url, api_key=self.config.api_key)
-
-    async def completion(
-        self,
-        model_id: str,
+    # Map Llama Stack spec to OpenAI spec -
+    #  str -> str
+    #  {"type": "text", "text": ...} -> {"type": "text", "text": ...}
+    #  {"type": "image", "image": {"url": {"uri": ...}}} -> {"type": "image_url", "image_url": {"url": ...}}
+    #  {"type": "image", "image": {"data": ...}} -> {"type": "image_url", "image_url": {"url": "data:image/?;base64,..."}}
+    #  List[...] -> List[...]
+    async def _convert_message_content(
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> AsyncGenerator:
-        raise NotImplementedError()
-
-    async def chat_completion(
-        self,
-        model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
-        stream: Optional[bool] = False,
-        tool_config: Optional[ToolConfig] = None,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> AsyncGenerator:
-        if sampling_params is None:
-            sampling_params = SamplingParams()
-        model = await self.model_store.get_model(model_id)
-
-        request = ChatCompletionRequest(
-            model=model.provider_resource_id,
-            messages=messages,
-            sampling_params=sampling_params,
-            tools=tools or [],
-            stream=stream,
-            logprobs=logprobs,
-            tool_config=tool_config,
-        )
-        request_sambanova = await self.convert_chat_completion_request(request)
-
-        if stream:
-            return self._stream_chat_completion(request_sambanova)
-        else:
-            return await self._nonstream_chat_completion(request_sambanova)
-
-    async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
-        response = self._get_client().chat.completions.create(**request)
-
-        choice = response.choices[0]
-
-        result = ChatCompletionResponse(
-            completion_message=CompletionMessage(
-                content=choice.message.content or "",
-                stop_reason=self.convert_to_sambanova_finish_reason(choice.finish_reason),
-                tool_calls=self.convert_to_sambanova_tool_calls(choice.message.tool_calls),
-            ),
-            logprobs=None,
-        )
-
-        return result
-
-    async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
-        async def _to_async_generator():
-            streaming = self._get_client().chat.completions.create(**request)
-            for chunk in streaming:
-                yield chunk
-
-        stream = _to_async_generator()
-        async for chunk in process_chat_completion_stream_response(stream, request):
-            yield chunk
-
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
-    ) -> EmbeddingsResponse:
-        raise NotImplementedError()
-
-    async def convert_chat_completion_request(self, request: ChatCompletionRequest) -> dict:
-        compatible_request = self.convert_sampling_params(request.sampling_params)
-        compatible_request["model"] = request.model
-        compatible_request["messages"] = await self.convert_to_sambanova_messages(request.messages)
-        compatible_request["stream"] = request.stream
-        compatible_request["logprobs"] = False
-        compatible_request["extra_headers"] = {
-            b"User-Agent": b"llama-stack: sambanova-inference-adapter",
-        }
-        compatible_request["tools"] = self.convert_to_sambanova_tool(request.tools)
-        return compatible_request
-
-    def convert_sampling_params(self, sampling_params: SamplingParams, legacy: bool = False) -> dict:
-        params = {}
-
-        if sampling_params:
-            params["frequency_penalty"] = sampling_params.repetition_penalty
-
-            if sampling_params.max_tokens:
-                if legacy:
-                    params["max_tokens"] = sampling_params.max_tokens
-                else:
-                    params["max_completion_tokens"] = sampling_params.max_tokens
-
-            if isinstance(sampling_params.strategy, TopPSamplingStrategy):
-                params["top_p"] = sampling_params.strategy.top_p
-            if isinstance(sampling_params.strategy, TopKSamplingStrategy):
-                params["extra_body"]["top_k"] = sampling_params.strategy.top_k
-            if isinstance(sampling_params.strategy, GreedySamplingStrategy):
-                params["temperature"] = 0.0
-
-        return params
-
-    async def convert_to_sambanova_messages(self, messages: List[Message]) -> List[dict]:
-        conversation = []
-        for message in messages:
-            content = {}
-
-            content["content"] = await self.convert_to_sambanova_content(message)
-
-            if isinstance(message, UserMessage):
-                content["role"] = "user"
-            elif isinstance(message, CompletionMessage):
-                content["role"] = "assistant"
-                tools = []
-                for tool_call in message.tool_calls:
-                    tools.append(
-                        {
-                            "id": tool_call.call_id,
-                            "function": {
-                                "name": tool_call.name,
-                                "arguments": json.dumps(tool_call.arguments),
-                            },
-                            "type": "function",
-                        }
-                    )
-                content["tool_calls"] = tools
-            elif isinstance(message, ToolResponseMessage):
-                content["role"] = "tool"
-                content["tool_call_id"] = message.call_id
-            elif isinstance(message, SystemMessage):
-                content["role"] = "system"
-
-            conversation.append(content)
-
-        return conversation
-
-    async def convert_to_sambanova_content(self, message: Message) -> dict:
-        async def _convert_content(content) -> dict:
-            if isinstance(content, ImageContentItem):
-                url = await convert_image_content_to_url(content, download=True)
-                # A fix to make sure the call sucess.
-                components = url.split(";base64")
-                url = f"{components[0].lower()};base64{components[1]}"
-                return {
-                    "type": "image_url",
-                    "image_url": {"url": url},
-                }
+    ) -> str | Iterable[OpenAIChatCompletionContentPartParam]:
+        async def impl(
+            content_: InterleavedContent,
+        ) -> str | OpenAIChatCompletionContentPartParam | list[OpenAIChatCompletionContentPartParam]:
+            # Llama Stack and OpenAI spec match for str and text input
+            if isinstance(content_, str):
+                return content_
+            elif isinstance(content_, TextContentItem):
+                return OpenAIChatCompletionContentPartTextParam(
+                    type="text",
+                    text=content_.text,
+                )
+            elif isinstance(content_, ImageContentItem):
+                return OpenAIChatCompletionContentPartImageParam(
+                    type="image_url",
+                    image_url=OpenAIImageURL(url=await convert_image_content_to_url(content_, download=True)),
+                )
+            elif isinstance(content_, list):
+                return [await impl(item) for item in content_]
             else:
-                text = content.text if isinstance(content, TextContentItem) else content
-                assert isinstance(text, str)
-                return {"type": "text", "text": text}
+                raise ValueError(f"Unsupported content type: {type(content_)}")
 
-        if isinstance(message.content, list):
-            # If it is a list, the text content should be wrapped in dict
-            content = [await _convert_content(c) for c in message.content]
+        ret = await impl(content)
+
+        # OpenAI*Message expects a str or list
+        if isinstance(ret, str) or isinstance(ret, list):
+            return ret
         else:
-            content = message.content
+            return [ret]
 
-        return content
+    out: OpenAIChatCompletionMessage = None
+    if isinstance(message, UserMessage):
+        out = OpenAIChatCompletionUserMessage(
+            role="user",
+            content=await _convert_message_content(message.content),
+        )
+    elif isinstance(message, CompletionMessage):
+        out = OpenAIChatCompletionAssistantMessage(
+            role="assistant",
+            content=await _convert_message_content(message.content),
+            tool_calls=[
+                OpenAIChatCompletionMessageToolCall(
+                    id=tool.call_id,
+                    function=OpenAIFunction(
+                        name=tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value,
+                        arguments=json.dumps(tool.arguments),
+                    ),
+                    type="function",
+                )
+                for tool in message.tool_calls
+            ]
+            or None,
+        )
+    elif isinstance(message, ToolResponseMessage):
+        out = OpenAIChatCompletionToolMessage(
+            role="tool",
+            tool_call_id=message.call_id,
+            content=await _convert_message_content(message.content),
+        )
+    elif isinstance(message, SystemMessage):
+        out = OpenAIChatCompletionSystemMessage(
+            role="system",
+            content=await _convert_message_content(message.content),
+        )
+    else:
+        raise ValueError(f"Unsupported message type: {type(message)}")
 
-    def convert_to_sambanova_tool(self, tools: List[ToolDefinition]) -> List[dict]:
-        if tools is None:
-            return tools
+    return out
 
-        compatiable_tools = []
 
-        for tool in tools:
-            properties = {}
-            compatiable_required = []
-            if tool.parameters:
-                for tool_key, tool_param in tool.parameters.items():
-                    properties[tool_key] = {"type": tool_param.param_type}
-                    if tool_param.description:
-                        properties[tool_key]["description"] = tool_param.description
-                    if tool_param.default:
-                        properties[tool_key]["default"] = tool_param.default
-                    if tool_param.required:
-                        compatiable_required.append(tool_key)
+class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
+    _config: SambaNovaImplConfig
 
-            compatiable_tool = {
-                "type": "function",
-                "function": {
-                    "name": tool.tool_name,
-                    "description": tool.description,
-                    "parameters": {
-                        "type": "object",
-                        "properties": properties,
-                        "required": compatiable_required,
-                    },
+    def __init__(self, config: SambaNovaImplConfig):
+        self.config = config
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            model_entries=MODEL_ENTRIES,
+            api_key_from_config=self.config.api_key,
+            provider_data_api_key_field="sambanova_api_key",
+        )
+
+    def _get_api_key(self) -> str:
+        config_api_key = self.config.api_key if self.config.api_key else None
+        if config_api_key:
+            return config_api_key.get_secret_value()
+        else:
+            provider_data = self.get_request_provider_data()
+            if provider_data is None or not provider_data.sambanova_api_key:
+                raise ValueError(
+                    'Pass Sambanova API Key in the header X-LlamaStack-Provider-Data as { "sambanova_api_key": <your api key> }'
+                )
+            return provider_data.sambanova_api_key
+
+    async def _get_params(self, request: ChatCompletionRequest) -> dict:
+        input_dict = {}
+
+        input_dict["messages"] = [await convert_message_to_openai_dict_with_b64_images(m) for m in request.messages]
+        if fmt := request.response_format:
+            if not isinstance(fmt, JsonSchemaResponseFormat):
+                raise ValueError(
+                    f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
+                )
+
+            fmt = fmt.json_schema
+            name = fmt["title"]
+            del fmt["title"]
+            fmt["additionalProperties"] = False
+
+            # Apply additionalProperties: False recursively to all objects
+            fmt = self._add_additional_properties_recursive(fmt)
+
+            input_dict["response_format"] = {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": name,
+                    "schema": fmt,
+                    "strict": False,
                 },
             }
+        if request.tools:
+            input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
+            if request.tool_config.tool_choice:
+                input_dict["tool_choice"] = (
+                    request.tool_config.tool_choice.value
+                    if isinstance(request.tool_config.tool_choice, ToolChoice)
+                    else request.tool_config.tool_choice
+                )
 
-            compatiable_tools.append(compatiable_tool)
+        provider_data = self.get_request_provider_data()
+        key_field = self.provider_data_api_key_field
+        if provider_data and getattr(provider_data, key_field, None):
+            api_key = getattr(provider_data, key_field)
+        else:
+            api_key = self._get_api_key()
 
-        if len(compatiable_tools) > 0:
-            return compatiable_tools
-        return None
-
-    def convert_to_sambanova_finish_reason(self, finish_reason: str) -> StopReason:
         return {
-            "stop": StopReason.end_of_turn,
-            "length": StopReason.out_of_tokens,
-            "tool_calls": StopReason.end_of_message,
-        }.get(finish_reason, StopReason.end_of_turn)
+            "model": request.model,
+            "api_key": api_key,
+            "api_base": self.config.url,
+            **input_dict,
+            "stream": request.stream,
+            **get_sampling_options(request.sampling_params),
+        }
 
-    def convert_to_sambanova_tool_calls(
-        self,
-        tool_calls,
-    ) -> List[ToolCall]:
-        if not tool_calls:
-            return []
+    async def initialize(self):
+        await super().initialize()
 
-        compitable_tool_calls = [
-            ToolCall(
-                call_id=call.id,
-                tool_name=call.function.name,
-                arguments=json.loads(call.function.arguments),
-                arguments_json=call.function.arguments,
-            )
-            for call in tool_calls
-        ]
-
-        return compitable_tool_calls
+    async def shutdown(self):
+        await super().shutdown()
diff --git a/llama_stack/providers/remote/inference/sambanova_openai_compat/__init__.py b/llama_stack/providers/remote/inference/sambanova_openai_compat/__init__.py
index e31a3364c..60afe91ca 100644
--- a/llama_stack/providers/remote/inference/sambanova_openai_compat/__init__.py
+++ b/llama_stack/providers/remote/inference/sambanova_openai_compat/__init__.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import Inference
+from llama_stack.apis.inference import InferenceProvider
 
 from .config import SambaNovaCompatConfig
 
 
-async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> Inference:
+async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> InferenceProvider:
     # import dynamically so the import is used only when it is needed
     from .sambanova import SambaNovaCompatInferenceAdapter
 
diff --git a/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py b/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py
index b792cb6e7..072fa85d1 100644
--- a/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -12,7 +12,7 @@ from llama_stack.schema_utils import json_schema_type
 
 
 class SambaNovaProviderDataValidator(BaseModel):
-    sambanova_api_key: Optional[str] = Field(
+    sambanova_api_key: str | None = Field(
         default=None,
         description="API key for SambaNova models",
     )
@@ -20,7 +20,7 @@ class SambaNovaProviderDataValidator(BaseModel):
 
 @json_schema_type
 class SambaNovaCompatConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="The SambaNova API key",
     )
@@ -31,7 +31,7 @@ class SambaNovaCompatConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "openai_compat_api_base": "https://api.sambanova.ai/v1",
             "api_key": api_key,
diff --git a/llama_stack/providers/remote/inference/tgi/__init__.py b/llama_stack/providers/remote/inference/tgi/__init__.py
index 834e51324..51614f1a6 100644
--- a/llama_stack/providers/remote/inference/tgi/__init__.py
+++ b/llama_stack/providers/remote/inference/tgi/__init__.py
@@ -4,13 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Union
-
 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
 
 
 async def get_adapter_impl(
-    config: Union[InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig],
+    config: InferenceAPIImplConfig | InferenceEndpointImplConfig | TGIImplConfig,
     _deps,
 ):
     from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
diff --git a/llama_stack/providers/remote/inference/tgi/config.py b/llama_stack/providers/remote/inference/tgi/config.py
index 6ad663662..3d632c9d8 100644
--- a/llama_stack/providers/remote/inference/tgi/config.py
+++ b/llama_stack/providers/remote/inference/tgi/config.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Optional
 
 from pydantic import BaseModel, Field, SecretStr
 
@@ -29,7 +28,7 @@ class InferenceEndpointImplConfig(BaseModel):
     endpoint_name: str = Field(
         description="The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided.",
     )
-    api_token: Optional[SecretStr] = Field(
+    api_token: SecretStr | None = Field(
         default=None,
         description="Your Hugging Face user access token (will default to locally saved token if not provided)",
     )
@@ -52,7 +51,7 @@ class InferenceAPIImplConfig(BaseModel):
     huggingface_repo: str = Field(
         description="The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct')",
     )
-    api_token: Optional[SecretStr] = Field(
+    api_token: SecretStr | None = Field(
         default=None,
         description="Your Hugging Face user access token (will default to locally saved token if not provided)",
     )
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py
index 8f5b5e3cc..292d74ef8 100644
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -6,7 +6,7 @@
 
 
 import logging
-from typing import AsyncGenerator, List, Optional
+from collections.abc import AsyncGenerator
 
 from huggingface_hub import AsyncInferenceClient, HfApi
 
@@ -23,6 +23,7 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
@@ -40,10 +41,10 @@ from llama_stack.providers.utils.inference.model_registry import (
     build_hf_repo_model_entry,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
     OpenAICompatCompletionChoice,
     OpenAICompatCompletionResponse,
-    OpenAICompletionUnsupportedMixin,
+    OpenAICompletionToLlamaStackMixin,
     get_sampling_options,
     process_chat_completion_response,
     process_chat_completion_stream_response,
@@ -73,8 +74,8 @@ def build_hf_repo_model_entries():
 
 class _HfAdapter(
     Inference,
-    OpenAIChatCompletionUnsupportedMixin,
-    OpenAICompletionUnsupportedMixin,
+    OpenAIChatCompletionToLlamaStackMixin,
+    OpenAICompletionToLlamaStackMixin,
     ModelsProtocolPrivate,
 ):
     client: AsyncInferenceClient
@@ -105,10 +106,10 @@ class _HfAdapter(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -134,7 +135,7 @@ class _HfAdapter(
 
     def _build_options(
         self,
-        sampling_params: Optional[SamplingParams] = None,
+        sampling_params: SamplingParams | None = None,
         fmt: ResponseFormat = None,
     ):
         options = get_sampling_options(sampling_params)
@@ -209,15 +210,15 @@ class _HfAdapter(
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -284,13 +285,23 @@ class _HfAdapter(
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         raise NotImplementedError()
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
 
 class TGIAdapter(_HfAdapter):
     async def initialize(self, config: TGIImplConfig) -> None:
diff --git a/llama_stack/providers/remote/inference/together/config.py b/llama_stack/providers/remote/inference/together/config.py
index fa7c45c9f..5c7f60519 100644
--- a/llama_stack/providers/remote/inference/together/config.py
+++ b/llama_stack/providers/remote/inference/together/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field, SecretStr
 
@@ -17,13 +17,13 @@ class TogetherImplConfig(BaseModel):
         default="https://api.together.xyz/v1",
         description="The URL for the Together AI server",
     )
-    api_key: Optional[SecretStr] = Field(
+    api_key: SecretStr | None = Field(
         default=None,
         description="The Together AI API Key",
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
             "url": "https://api.together.xyz/v1",
             "api_key": "${env.TOGETHER_API_KEY:}",
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 1615b8cd1..7305a638d 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from collections.abc import AsyncGenerator, AsyncIterator
+from typing import Any
 
 from openai import AsyncOpenAI
 from together import AsyncTogether
@@ -22,6 +23,7 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
@@ -31,7 +33,13 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+)
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
@@ -70,17 +78,20 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
 
     async def shutdown(self) -> None:
         if self._client:
-            await self._client.close()
+            # Together client has no close method, so just set to None
             self._client = None
+        if self._openai_client:
+            await self._openai_client.close()
+            self._openai_client = None
 
     async def completion(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -138,8 +149,8 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
 
     def _build_options(
         self,
-        sampling_params: Optional[SamplingParams],
-        logprobs: Optional[LogProbConfig],
+        sampling_params: SamplingParams | None,
+        logprobs: LogProbConfig | None,
         fmt: ResponseFormat,
     ) -> dict:
         options = get_sampling_options(sampling_params)
@@ -166,15 +177,15 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> AsyncGenerator:
         if sampling_params is None:
             sampling_params = SamplingParams()
@@ -215,7 +226,7 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
         async for chunk in process_chat_completion_stream_response(stream, request):
             yield chunk
 
-    async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
+    async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
         input_dict = {}
         media_present = request_has_media(request)
         llama_model = self.get_llama_model(request.model)
@@ -240,10 +251,10 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         model = await self.model_store.get_model(model_id)
         assert all(not content_has_media(content) for content in contents), (
@@ -257,27 +268,37 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
         embeddings = [item.embedding for item in r.data]
         return EmbeddingsResponse(embeddings=embeddings)
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
     async def openai_completion(
         self,
         model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
     ) -> OpenAICompletion:
         model_obj = await self.model_store.get_model(model)
         params = await prepare_openai_completion_params(
@@ -304,29 +325,29 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIMessageParam],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-    ) -> OpenAIChatCompletion:
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
         model_obj = await self.model_store.get_model(model)
         params = await prepare_openai_completion_params(
             model=model_obj.provider_resource_id,
@@ -353,4 +374,26 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
             top_p=top_p,
             user=user,
         )
+        if params.get("stream", False):
+            return self._stream_openai_chat_completion(params)
         return await self._get_openai_client().chat.completions.create(**params)  # type: ignore
+
+    async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator:
+        # together.ai sometimes adds usage data to the stream, even if include_usage is False
+        # This causes an unexpected final chunk with empty choices array to be sent
+        # to clients that may not handle it gracefully.
+        include_usage = False
+        if params.get("stream_options", None):
+            include_usage = params["stream_options"].get("include_usage", False)
+        stream = await self._get_openai_client().chat.completions.create(**params)
+
+        seen_finish_reason = False
+        async for chunk in stream:
+            # Final usage chunk with no choices that the user didn't request, so discard
+            if not include_usage and seen_finish_reason and len(chunk.choices) == 0:
+                break
+            yield chunk
+            for choice in chunk.choices:
+                if choice.finish_reason:
+                    seen_finish_reason = True
+                    break
diff --git a/llama_stack/providers/remote/inference/together_openai_compat/__init__.py b/llama_stack/providers/remote/inference/together_openai_compat/__init__.py
index 6fdf05b7e..8213fc5f4 100644
--- a/llama_stack/providers/remote/inference/together_openai_compat/__init__.py
+++ b/llama_stack/providers/remote/inference/together_openai_compat/__init__.py
@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import Inference
+from llama_stack.apis.inference import InferenceProvider
 
 from .config import TogetherCompatConfig
 
 
-async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> Inference:
+async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> InferenceProvider:
     # import dynamically so the import is used only when it is needed
     from .together import TogetherCompatInferenceAdapter
 
diff --git a/llama_stack/providers/remote/inference/together_openai_compat/config.py b/llama_stack/providers/remote/inference/together_openai_compat/config.py
index 120adbed9..0c6d4f748 100644
--- a/llama_stack/providers/remote/inference/together_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/together_openai_compat/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -12,7 +12,7 @@ from llama_stack.schema_utils import json_schema_type
 
 
 class TogetherProviderDataValidator(BaseModel):
-    together_api_key: Optional[str] = Field(
+    together_api_key: str | None = Field(
         default=None,
         description="API key for Together models",
     )
@@ -20,7 +20,7 @@ class TogetherProviderDataValidator(BaseModel):
 
 @json_schema_type
 class TogetherCompatConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="The Together API key",
     )
@@ -31,7 +31,7 @@ class TogetherCompatConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY}", **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "openai_compat_api_base": "https://api.together.xyz/v1",
             "api_key": api_key,
diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py
index 762cffde3..99abddf51 100644
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@@ -4,16 +4,16 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Optional
+from pathlib import Path
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 
 from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
 class VLLMInferenceAdapterConfig(BaseModel):
-    url: Optional[str] = Field(
+    url: str | None = Field(
         default=None,
         description="The URL for the vLLM model serving endpoint",
     )
@@ -21,15 +21,31 @@ class VLLMInferenceAdapterConfig(BaseModel):
         default=4096,
         description="Maximum number of tokens to generate.",
     )
-    api_token: Optional[str] = Field(
+    api_token: str | None = Field(
         default="fake",
         description="The API token",
     )
-    tls_verify: bool = Field(
+    tls_verify: bool | str = Field(
         default=True,
-        description="Whether to verify TLS certificates",
+        description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
     )
 
+    @field_validator("tls_verify")
+    @classmethod
+    def validate_tls_verify(cls, v):
+        if isinstance(v, str):
+            # Check if it's a boolean string
+            if v.lower() in ("true", "false"):
+                return v.lower() == "true"
+            # Otherwise, treat it as a cert path
+            cert_path = Path(v).expanduser().resolve()
+            if not cert_path.exists():
+                raise ValueError(f"TLS certificate file does not exist: {v}")
+            if not cert_path.is_file():
+                raise ValueError(f"TLS certificate path is not a file: {v}")
+            return v
+        return v
+
     @classmethod
     def sample_run_config(
         cls,
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 0044d2e75..9f38d9abf 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -5,7 +5,8 @@
 # the root directory of this source tree.
 import json
 import logging
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from collections.abc import AsyncGenerator, AsyncIterator
+from typing import Any
 
 import httpx
 from openai import AsyncOpenAI
@@ -37,6 +38,7 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -45,7 +47,12 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAICompletion,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
 from llama_stack.models.llama.sku_list import all_registered_models
@@ -89,7 +96,7 @@ def build_hf_repo_model_entries():
 
 def _convert_to_vllm_tool_calls_in_response(
     tool_calls,
-) -> List[ToolCall]:
+) -> list[ToolCall]:
     if not tool_calls:
         return []
 
@@ -104,7 +111,7 @@ def _convert_to_vllm_tool_calls_in_response(
     ]
 
 
-def _convert_to_vllm_tools_in_request(tools: List[ToolDefinition]) -> List[dict]:
+def _convert_to_vllm_tools_in_request(tools: list[ToolDefinition]) -> list[dict]:
     compat_tools = []
 
     for tool in tools:
@@ -152,27 +159,28 @@ def _convert_to_vllm_finish_reason(finish_reason: str) -> StopReason:
     }.get(finish_reason, StopReason.end_of_turn)
 
 
-async def _process_vllm_chat_completion_stream_response(
-    stream: AsyncGenerator[OpenAIChatCompletionChunk, None],
-) -> AsyncGenerator:
-    event_type = ChatCompletionResponseEventType.start
-    tool_call_buf = UnparseableToolCall()
-    async for chunk in stream:
-        if not chunk.choices:
-            log.warning("vLLM failed to generation any completions - check the vLLM server logs for an error.")
-            continue
-        choice = chunk.choices[0]
-        if choice.finish_reason:
-            args_str = tool_call_buf.arguments
-            args = None
-            try:
-                args = {} if not args_str else json.loads(args_str)
-            except Exception as e:
-                log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}")
-            if args:
-                yield ChatCompletionResponseStreamChunk(
+def _process_vllm_chat_completion_end_of_stream(
+    finish_reason: str | None,
+    last_chunk_content: str | None,
+    current_event_type: ChatCompletionResponseEventType,
+    tool_call_bufs: dict[str, UnparseableToolCall] | None = None,
+) -> list[OpenAIChatCompletionChunk]:
+    chunks = []
+
+    if finish_reason is not None:
+        stop_reason = _convert_to_vllm_finish_reason(finish_reason)
+    else:
+        stop_reason = StopReason.end_of_message
+
+    tool_call_bufs = tool_call_bufs or {}
+    for _index, tool_call_buf in sorted(tool_call_bufs.items()):
+        args_str = tool_call_buf.arguments or "{}"
+        try:
+            args = json.loads(args_str)
+            chunks.append(
+                ChatCompletionResponseStreamChunk(
                     event=ChatCompletionResponseEvent(
-                        event_type=event_type,
+                        event_type=current_event_type,
                         delta=ToolCallDelta(
                             tool_call=ToolCall(
                                 call_id=tool_call_buf.call_id,
@@ -184,8 +192,12 @@ async def _process_vllm_chat_completion_stream_response(
                         ),
                     )
                 )
-            elif args_str:
-                yield ChatCompletionResponseStreamChunk(
+            )
+        except Exception as e:
+            log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}")
+
+            chunks.append(
+                ChatCompletionResponseStreamChunk(
                     event=ChatCompletionResponseEvent(
                         event_type=ChatCompletionResponseEventType.progress,
                         delta=ToolCallDelta(
@@ -194,21 +206,62 @@ async def _process_vllm_chat_completion_stream_response(
                         ),
                     )
                 )
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=ChatCompletionResponseEventType.complete,
-                    delta=TextDelta(text=choice.delta.content or ""),
-                    logprobs=None,
-                    stop_reason=_convert_to_vllm_finish_reason(choice.finish_reason),
-                )
             )
-        elif choice.delta.tool_calls:
-            tool_call = convert_tool_call(choice.delta.tool_calls[0])
-            tool_call_buf.tool_name += str(tool_call.tool_name)
-            tool_call_buf.call_id += tool_call.call_id
-            # TODO: remove str() when dict type for 'arguments' is no longer allowed
-            tool_call_buf.arguments += str(tool_call.arguments)
-        else:
+
+    chunks.append(
+        ChatCompletionResponseStreamChunk(
+            event=ChatCompletionResponseEvent(
+                event_type=ChatCompletionResponseEventType.complete,
+                delta=TextDelta(text=last_chunk_content or ""),
+                logprobs=None,
+                stop_reason=stop_reason,
+            )
+        )
+    )
+
+    return chunks
+
+
+async def _process_vllm_chat_completion_stream_response(
+    stream: AsyncGenerator[OpenAIChatCompletionChunk, None],
+) -> AsyncGenerator:
+    yield ChatCompletionResponseStreamChunk(
+        event=ChatCompletionResponseEvent(
+            event_type=ChatCompletionResponseEventType.start,
+            delta=TextDelta(text=""),
+        )
+    )
+    event_type = ChatCompletionResponseEventType.progress
+    tool_call_bufs: dict[str, UnparseableToolCall] = {}
+    end_of_stream_processed = False
+
+    async for chunk in stream:
+        if not chunk.choices:
+            log.warning("vLLM failed to generation any completions - check the vLLM server logs for an error.")
+            return
+        choice = chunk.choices[0]
+        if choice.delta.tool_calls:
+            for delta_tool_call in choice.delta.tool_calls:
+                tool_call = convert_tool_call(delta_tool_call)
+                if delta_tool_call.index not in tool_call_bufs:
+                    tool_call_bufs[delta_tool_call.index] = UnparseableToolCall()
+                tool_call_buf = tool_call_bufs[delta_tool_call.index]
+                tool_call_buf.tool_name += str(tool_call.tool_name)
+                tool_call_buf.call_id += tool_call.call_id
+                tool_call_buf.arguments += (
+                    tool_call.arguments if isinstance(tool_call.arguments, str) else json.dumps(tool_call.arguments)
+                )
+        if choice.finish_reason:
+            chunks = _process_vllm_chat_completion_end_of_stream(
+                finish_reason=choice.finish_reason,
+                last_chunk_content=choice.delta.content,
+                current_event_type=event_type,
+                tool_call_bufs=tool_call_bufs,
+            )
+            for c in chunks:
+                yield c
+            end_of_stream_processed = True
+        elif not choice.delta.tool_calls:
             yield ChatCompletionResponseStreamChunk(
                 event=ChatCompletionResponseEvent(
                     event_type=event_type,
@@ -218,6 +271,17 @@ async def _process_vllm_chat_completion_stream_response(
             )
             event_type = ChatCompletionResponseEventType.progress
 
+    if end_of_stream_processed:
+        return
+
+    # the stream ended without a chunk containing finish_reason - we have to generate the
+    # respective completion chunks manually
+    chunks = _process_vllm_chat_completion_end_of_stream(
+        finish_reason=None, last_chunk_content=None, current_event_type=event_type, tool_call_bufs=tool_call_bufs
+    )
+    for c in chunks:
+        yield c
+
 
 class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
     def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
@@ -226,12 +290,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         self.client = None
 
     async def initialize(self) -> None:
-        log.info(f"Initializing VLLM client with base_url={self.config.url}")
-        self.client = AsyncOpenAI(
-            base_url=self.config.url,
-            api_key=self.config.api_token,
-            http_client=None if self.config.tls_verify else httpx.AsyncClient(verify=False),
-        )
+        pass
 
     async def shutdown(self) -> None:
         pass
@@ -244,18 +303,35 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             raise ValueError("Model store not set")
         return await self.model_store.get_model(model_id)
 
+    def _lazy_initialize_client(self):
+        if self.client is not None:
+            return
+
+        log.info(f"Initializing vLLM client with base_url={self.config.url}")
+        self.client = self._create_client()
+
+    def _create_client(self):
+        return AsyncOpenAI(
+            base_url=self.config.url,
+            api_key=self.config.api_token,
+            http_client=httpx.AsyncClient(verify=self.config.tls_verify),
+        )
+
     async def completion(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> CompletionResponse | AsyncGenerator[CompletionResponseStreamChunk, None]:
+        self._lazy_initialize_client()
         if sampling_params is None:
             sampling_params = SamplingParams()
         model = await self._get_model(model_id)
+        if model.provider_resource_id is None:
+            raise ValueError(f"Model {model_id} has no provider_resource_id set")
         request = CompletionRequest(
             model=model.provider_resource_id,
             content=content,
@@ -272,19 +348,22 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
     ) -> ChatCompletionResponse | AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
+        self._lazy_initialize_client()
         if sampling_params is None:
             sampling_params = SamplingParams()
         model = await self._get_model(model_id)
+        if model.provider_resource_id is None:
+            raise ValueError(f"Model {model_id} has no provider_resource_id set")
         # This is to be consistent with OpenAI API and support vLLM <= v0.6.3
         # References:
         #   * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
@@ -352,9 +431,15 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             yield chunk
 
     async def register_model(self, model: Model) -> Model:
-        assert self.client is not None
-        model = await self.register_helper.register_model(model)
-        res = await self.client.models.list()
+        # register_model is called during Llama Stack initialization, hence we cannot init self.client if not initialized yet.
+        # self.client should only be created after the initialization is complete to avoid asyncio cross-context errors.
+        # Changing this may lead to unpredictable behavior.
+        client = self._create_client() if self.client is None else self.client
+        try:
+            model = await self.register_helper.register_model(model)
+        except ValueError:
+            pass  # Ignore statically unknown model, will check live listing
+        res = await client.models.list()
         available_models = [m.id async for m in res]
         if model.provider_resource_id not in available_models:
             raise ValueError(
@@ -363,13 +448,14 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             )
         return model
 
-    async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
+    async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
         options = get_sampling_options(request.sampling_params)
         if "max_tokens" not in options:
             options["max_tokens"] = self.config.max_tokens
 
         input_dict: dict[str, Any] = {}
-        if isinstance(request, ChatCompletionRequest) and request.tools is not None:
+        # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
+        if isinstance(request, ChatCompletionRequest) and request.tools:
             input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
 
         if isinstance(request, ChatCompletionRequest):
@@ -399,11 +485,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
+        self._lazy_initialize_client()
         assert self.client is not None
         model = await self._get_model(model_id)
 
@@ -421,31 +508,42 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         embeddings = [data.embedding for data in response.data]
         return EmbeddingsResponse(embeddings=embeddings)
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
     async def openai_completion(
         self,
         model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
     ) -> OpenAICompletion:
+        self._lazy_initialize_client()
         model_obj = await self._get_model(model)
 
-        extra_body: Dict[str, Any] = {}
+        extra_body: dict[str, Any] = {}
         if prompt_logprobs is not None and prompt_logprobs >= 0:
             extra_body["prompt_logprobs"] = prompt_logprobs
         if guided_choice:
@@ -476,29 +574,30 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIMessageParam],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-    ) -> OpenAIChatCompletion:
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        self._lazy_initialize_client()
         model_obj = await self._get_model(model)
         params = await prepare_openai_completion_params(
             model=model_obj.provider_resource_id,
@@ -530,21 +629,21 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
     async def batch_completion(
         self,
         model_id: str,
-        content_batch: List[InterleavedContent],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        content_batch: list[InterleavedContent],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ):
         raise NotImplementedError("Batch completion is not supported for Ollama")
 
     async def batch_chat_completion(
         self,
         model_id: str,
-        messages_batch: List[List[Message]],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        messages_batch: list[list[Message]],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_config: ToolConfig | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ):
         raise NotImplementedError("Batch chat completion is not supported for Ollama")
diff --git a/llama_stack/providers/remote/inference/watsonx/__init__.py b/llama_stack/providers/remote/inference/watsonx/__init__.py
new file mode 100644
index 000000000..e59e873b6
--- /dev/null
+++ b/llama_stack/providers/remote/inference/watsonx/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import Inference
+
+from .config import WatsonXConfig
+
+
+async def get_adapter_impl(config: WatsonXConfig, _deps) -> Inference:
+    # import dynamically so `llama stack build` does not fail due to missing dependencies
+    from .watsonx import WatsonXInferenceAdapter
+
+    if not isinstance(config, WatsonXConfig):
+        raise RuntimeError(f"Unexpected config type: {type(config)}")
+    adapter = WatsonXInferenceAdapter(config)
+    return adapter
+
+
+__all__ = ["get_adapter_impl", "WatsonXConfig"]
diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py
new file mode 100644
index 000000000..5eda9c5c0
--- /dev/null
+++ b/llama_stack/providers/remote/inference/watsonx/config.py
@@ -0,0 +1,46 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+from typing import Any
+
+from pydantic import BaseModel, Field, SecretStr
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class WatsonXProviderDataValidator(BaseModel):
+    url: str
+    api_key: str
+    project_id: str
+
+
+@json_schema_type
+class WatsonXConfig(BaseModel):
+    url: str = Field(
+        default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
+        description="A base url for accessing the watsonx.ai",
+    )
+    api_key: SecretStr | None = Field(
+        default_factory=lambda: os.getenv("WATSONX_API_KEY"),
+        description="The watsonx API key, only needed of using the hosted service",
+    )
+    project_id: str | None = Field(
+        default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
+        description="The Project ID key, only needed of using the hosted service",
+    )
+    timeout: int = Field(
+        default=60,
+        description="Timeout for the HTTP requests",
+    )
+
+    @classmethod
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
+        return {
+            "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}",
+            "api_key": "${env.WATSONX_API_KEY:}",
+            "project_id": "${env.WATSONX_PROJECT_ID:}",
+        }
diff --git a/llama_stack/providers/remote/inference/watsonx/models.py b/llama_stack/providers/remote/inference/watsonx/models.py
new file mode 100644
index 000000000..d98f0510a
--- /dev/null
+++ b/llama_stack/providers/remote/inference/watsonx/models.py
@@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry
+
+MODEL_ENTRIES = [
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-2-13b-chat",
+        CoreModelId.llama2_13b.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-guard-3-11b-vision",
+        CoreModelId.llama_guard_3_11b_vision.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py
new file mode 100644
index 000000000..59f5f5562
--- /dev/null
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -0,0 +1,390 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from collections.abc import AsyncGenerator, AsyncIterator
+from typing import Any
+
+from ibm_watson_machine_learning.foundation_models import Model
+from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
+from openai import AsyncOpenAI
+
+from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
+from llama_stack.apis.inference import (
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    CompletionRequest,
+    EmbeddingsResponse,
+    EmbeddingTaskType,
+    Inference,
+    LogProbConfig,
+    Message,
+    OpenAIEmbeddingsResponse,
+    ResponseFormat,
+    SamplingParams,
+    TextTruncation,
+    ToolChoice,
+    ToolConfig,
+    ToolDefinition,
+    ToolPromptFormat,
+)
+from llama_stack.apis.inference.inference import (
+    GreedySamplingStrategy,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+)
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
+from llama_stack.providers.utils.inference.openai_compat import (
+    OpenAICompatCompletionChoice,
+    OpenAICompatCompletionResponse,
+    prepare_openai_completion_params,
+    process_chat_completion_response,
+    process_chat_completion_stream_response,
+    process_completion_response,
+    process_completion_stream_response,
+)
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    chat_completion_request_to_prompt,
+    completion_request_to_prompt,
+    request_has_media,
+)
+
+from . import WatsonXConfig
+from .models import MODEL_ENTRIES
+
+
+class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
+    def __init__(self, config: WatsonXConfig) -> None:
+        ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
+
+        print(f"Initializing watsonx InferenceAdapter({config.url})...")
+
+        self._config = config
+
+        self._project_id = self._config.project_id
+
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        pass
+
+    async def completion(
+        self,
+        model_id: str,
+        content: InterleavedContent,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+    ) -> AsyncGenerator:
+        if sampling_params is None:
+            sampling_params = SamplingParams()
+        model = await self.model_store.get_model(model_id)
+        request = CompletionRequest(
+            model=model.provider_resource_id,
+            content=content,
+            sampling_params=sampling_params,
+            response_format=response_format,
+            stream=stream,
+            logprobs=logprobs,
+        )
+        if stream:
+            return self._stream_completion(request)
+        else:
+            return await self._nonstream_completion(request)
+
+    def _get_client(self, model_id) -> Model:
+        config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None
+        config_url = self._config.url
+        project_id = self._config.project_id
+        credentials = {"url": config_url, "apikey": config_api_key}
+
+        return Model(model_id=model_id, credentials=credentials, project_id=project_id)
+
+    def _get_openai_client(self) -> AsyncOpenAI:
+        if not self._openai_client:
+            self._openai_client = AsyncOpenAI(
+                base_url=f"{self._config.url}/openai/v1",
+                api_key=self._config.api_key,
+            )
+        return self._openai_client
+
+    async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse:
+        params = await self._get_params(request)
+        r = self._get_client(request.model).generate(**params)
+        choices = []
+        if "results" in r:
+            for result in r["results"]:
+                choice = OpenAICompatCompletionChoice(
+                    finish_reason=result["stop_reason"] if result["stop_reason"] else None,
+                    text=result["generated_text"],
+                )
+                choices.append(choice)
+        response = OpenAICompatCompletionResponse(
+            choices=choices,
+        )
+        return process_completion_response(response)
+
+    async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
+        params = await self._get_params(request)
+
+        async def _generate_and_convert_to_openai_compat():
+            s = self._get_client(request.model).generate_text_stream(**params)
+            for chunk in s:
+                choice = OpenAICompatCompletionChoice(
+                    finish_reason=None,
+                    text=chunk,
+                )
+                yield OpenAICompatCompletionResponse(
+                    choices=[choice],
+                )
+
+        stream = _generate_and_convert_to_openai_compat()
+        async for chunk in process_completion_stream_response(stream):
+            yield chunk
+
+    async def chat_completion(
+        self,
+        model_id: str,
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
+    ) -> AsyncGenerator:
+        if sampling_params is None:
+            sampling_params = SamplingParams()
+        model = await self.model_store.get_model(model_id)
+        request = ChatCompletionRequest(
+            model=model.provider_resource_id,
+            messages=messages,
+            sampling_params=sampling_params,
+            tools=tools or [],
+            response_format=response_format,
+            stream=stream,
+            logprobs=logprobs,
+            tool_config=tool_config,
+        )
+
+        if stream:
+            return self._stream_chat_completion(request)
+        else:
+            return await self._nonstream_chat_completion(request)
+
+    async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
+        params = await self._get_params(request)
+        r = self._get_client(request.model).generate(**params)
+        choices = []
+        if "results" in r:
+            for result in r["results"]:
+                choice = OpenAICompatCompletionChoice(
+                    finish_reason=result["stop_reason"] if result["stop_reason"] else None,
+                    text=result["generated_text"],
+                )
+                choices.append(choice)
+        response = OpenAICompatCompletionResponse(
+            choices=choices,
+        )
+        return process_chat_completion_response(response, request)
+
+    async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
+        params = await self._get_params(request)
+        model_id = request.model
+
+        # if we shift to TogetherAsyncClient, we won't need this wrapper
+        async def _to_async_generator():
+            s = self._get_client(model_id).generate_text_stream(**params)
+            for chunk in s:
+                choice = OpenAICompatCompletionChoice(
+                    finish_reason=None,
+                    text=chunk,
+                )
+                yield OpenAICompatCompletionResponse(
+                    choices=[choice],
+                )
+
+        stream = _to_async_generator()
+        async for chunk in process_chat_completion_stream_response(stream, request):
+            yield chunk
+
+    async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
+        input_dict = {"params": {}}
+        media_present = request_has_media(request)
+        llama_model = self.get_llama_model(request.model)
+        if isinstance(request, ChatCompletionRequest):
+            input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
+        else:
+            assert not media_present, "Together does not support media for Completion requests"
+            input_dict["prompt"] = await completion_request_to_prompt(request)
+        if request.sampling_params:
+            if request.sampling_params.strategy:
+                input_dict["params"][GenParams.DECODING_METHOD] = request.sampling_params.strategy.type
+            if request.sampling_params.max_tokens:
+                input_dict["params"][GenParams.MAX_NEW_TOKENS] = request.sampling_params.max_tokens
+            if request.sampling_params.repetition_penalty:
+                input_dict["params"][GenParams.REPETITION_PENALTY] = request.sampling_params.repetition_penalty
+
+            if isinstance(request.sampling_params.strategy, TopPSamplingStrategy):
+                input_dict["params"][GenParams.TOP_P] = request.sampling_params.strategy.top_p
+                input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.strategy.temperature
+            if isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
+                input_dict["params"][GenParams.TOP_K] = request.sampling_params.strategy.top_k
+            if isinstance(request.sampling_params.strategy, GreedySamplingStrategy):
+                input_dict["params"][GenParams.TEMPERATURE] = 0.0
+
+        input_dict["params"][GenParams.STOP_SEQUENCES] = ["<|endoftext|>"]
+
+        params = {
+            **input_dict,
+        }
+        return params
+
+    async def embeddings(
+        self,
+        model_id: str,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
+    ) -> EmbeddingsResponse:
+        raise NotImplementedError("embedding is not supported for watsonx")
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
+    async def openai_completion(
+        self,
+        model: str,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
+    ) -> OpenAICompletion:
+        model_obj = await self.model_store.get_model(model)
+        params = await prepare_openai_completion_params(
+            model=model_obj.provider_resource_id,
+            prompt=prompt,
+            best_of=best_of,
+            echo=echo,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_tokens=max_tokens,
+            n=n,
+            presence_penalty=presence_penalty,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            top_p=top_p,
+            user=user,
+        )
+        return await self._get_openai_client().completions.create(**params)  # type: ignore
+
+    async def openai_chat_completion(
+        self,
+        model: str,
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        model_obj = await self.model_store.get_model(model)
+        params = await prepare_openai_completion_params(
+            model=model_obj.provider_resource_id,
+            messages=messages,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            presence_penalty=presence_penalty,
+            response_format=response_format,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+        )
+        if params.get("stream", False):
+            return self._stream_openai_chat_completion(params)
+        return await self._get_openai_client().chat.completions.create(**params)  # type: ignore
+
+    async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator:
+        # watsonx.ai sometimes adds usage data to the stream
+        include_usage = False
+        if params.get("stream_options", None):
+            include_usage = params["stream_options"].get("include_usage", False)
+        stream = await self._get_openai_client().chat.completions.create(**params)
+
+        seen_finish_reason = False
+        async for chunk in stream:
+            # Final usage chunk with no choices that the user didn't request, so discard
+            if not include_usage and seen_finish_reason and len(chunk.choices) == 0:
+                break
+            yield chunk
+            for choice in chunk.choices:
+                if choice.finish_reason:
+                    seen_finish_reason = True
+                    break
diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md
index 230587d66..3ef538d29 100644
--- a/llama_stack/providers/remote/post_training/nvidia/README.md
+++ b/llama_stack/providers/remote/post_training/nvidia/README.md
@@ -36,7 +36,6 @@ import os
 
 os.environ["NVIDIA_API_KEY"] = "your-api-key"
 os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
-os.environ["NVIDIA_USER_ID"] = "llama-stack-user"
 os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
 os.environ["NVIDIA_PROJECT_ID"] = "test-project"
 os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = "test-example-model@v1"
@@ -125,6 +124,21 @@ client.post_training.job.cancel(job_uuid="your-job-id")
 
 ### Inference with the fine-tuned model
 
+#### 1. Register the model
+
+```python
+from llama_stack.apis.models import Model, ModelType
+
+client.models.register(
+    model_id="test-example-model@v1",
+    provider_id="nvidia",
+    provider_model_id="test-example-model@v1",
+    model_type=ModelType.llm,
+)
+```
+
+#### 2. Inference with the fine-tuned model
+
 ```python
 response = client.inference.completion(
     content="Complete the sentence using one word: Roses are red, violets are ",
diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/llama_stack/providers/remote/post_training/nvidia/config.py
index 7b42c8bb0..fa08b6e3f 100644
--- a/llama_stack/providers/remote/post_training/nvidia/config.py
+++ b/llama_stack/providers/remote/post_training/nvidia/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import os
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -15,23 +15,23 @@ from pydantic import BaseModel, Field
 class NvidiaPostTrainingConfig(BaseModel):
     """Configuration for NVIDIA Post Training implementation."""
 
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default_factory=lambda: os.getenv("NVIDIA_API_KEY"),
         description="The NVIDIA API key.",
     )
 
-    dataset_namespace: Optional[str] = Field(
+    dataset_namespace: str | None = Field(
         default_factory=lambda: os.getenv("NVIDIA_DATASET_NAMESPACE", "default"),
         description="The NVIDIA dataset namespace.",
     )
 
-    project_id: Optional[str] = Field(
+    project_id: str | None = Field(
         default_factory=lambda: os.getenv("NVIDIA_PROJECT_ID", "test-example-model@v1"),
         description="The NVIDIA project ID.",
     )
 
     # ToDO: validate this, add default value
-    customizer_url: Optional[str] = Field(
+    customizer_url: str | None = Field(
         default_factory=lambda: os.getenv("NVIDIA_CUSTOMIZER_URL"),
         description="Base URL for the NeMo Customizer API",
     )
@@ -53,7 +53,7 @@ class NvidiaPostTrainingConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
             "api_key": "${env.NVIDIA_API_KEY:}",
             "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
@@ -71,27 +71,27 @@ class SFTLoRADefaultConfig(BaseModel):
     n_epochs: int = 50
 
     # NeMo customizer specific parameters
-    log_every_n_steps: Optional[int] = None
+    log_every_n_steps: int | None = None
     val_check_interval: float = 0.25
     sequence_packing_enabled: bool = False
     weight_decay: float = 0.01
     lr: float = 0.0001
 
     # SFT specific parameters
-    hidden_dropout: Optional[float] = None
-    attention_dropout: Optional[float] = None
-    ffn_dropout: Optional[float] = None
+    hidden_dropout: float | None = None
+    attention_dropout: float | None = None
+    ffn_dropout: float | None = None
 
     # LoRA default parameters
     lora_adapter_dim: int = 8
-    lora_adapter_dropout: Optional[float] = None
+    lora_adapter_dropout: float | None = None
     lora_alpha: int = 16
 
     # Data config
     batch_size: int = 8
 
     @classmethod
-    def sample_config(cls) -> Dict[str, Any]:
+    def sample_config(cls) -> dict[str, Any]:
         """Return a sample configuration for NVIDIA training."""
         return {
             "n_epochs": 50,
diff --git a/llama_stack/providers/remote/post_training/nvidia/models.py b/llama_stack/providers/remote/post_training/nvidia/models.py
index 7c696ac20..6a28f8af8 100644
--- a/llama_stack/providers/remote/post_training/nvidia/models.py
+++ b/llama_stack/providers/remote/post_training/nvidia/models.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
 
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
@@ -16,9 +15,13 @@ _MODEL_ENTRIES = [
     build_hf_repo_model_entry(
         "meta/llama-3.1-8b-instruct",
         CoreModelId.llama3_1_8b_instruct.value,
-    )
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
 ]
 
 
-def get_model_entries() -> List[ProviderModelEntry]:
+def get_model_entries() -> list[ProviderModelEntry]:
     return _MODEL_ENTRIES
diff --git a/llama_stack/providers/remote/post_training/nvidia/post_training.py b/llama_stack/providers/remote/post_training/nvidia/post_training.py
index e14fcf0cc..d839ffd6f 100644
--- a/llama_stack/providers/remote/post_training/nvidia/post_training.py
+++ b/llama_stack/providers/remote/post_training/nvidia/post_training.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 import warnings
 from datetime import datetime
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Literal
 
 import aiohttp
 from pydantic import BaseModel, ConfigDict
@@ -27,11 +27,12 @@ from .models import _MODEL_ENTRIES
 
 # Map API status to JobStatus enum
 STATUS_MAPPING = {
-    "running": "in_progress",
-    "completed": "completed",
-    "failed": "failed",
-    "cancelled": "cancelled",
-    "pending": "scheduled",
+    "running": JobStatus.in_progress.value,
+    "completed": JobStatus.completed.value,
+    "failed": JobStatus.failed.value,
+    "cancelled": JobStatus.cancelled.value,
+    "pending": JobStatus.scheduled.value,
+    "unknown": JobStatus.scheduled.value,
 }
 
 
@@ -49,7 +50,7 @@ class NvidiaPostTrainingJob(PostTrainingJob):
 
 
 class ListNvidiaPostTrainingJobs(BaseModel):
-    data: List[NvidiaPostTrainingJob]
+    data: list[NvidiaPostTrainingJob]
 
 
 class NvidiaPostTrainingJobStatusResponse(PostTrainingJobStatusResponse):
@@ -66,22 +67,27 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         self.timeout = aiohttp.ClientTimeout(total=config.timeout)
         # TODO: filter by available models based on /config endpoint
         ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES)
-        self.session = aiohttp.ClientSession(headers=self.headers, timeout=self.timeout)
-        self.customizer_url = config.customizer_url
+        self.session = None
 
+        self.customizer_url = config.customizer_url
         if not self.customizer_url:
             warnings.warn("Customizer URL is not set, using default value: http://nemo.test", stacklevel=2)
             self.customizer_url = "http://nemo.test"
 
+    async def _get_session(self) -> aiohttp.ClientSession:
+        if self.session is None or self.session.closed:
+            self.session = aiohttp.ClientSession(headers=self.headers, timeout=self.timeout)
+        return self.session
+
     async def _make_request(
         self,
         method: str,
         path: str,
-        headers: Optional[Dict[str, Any]] = None,
-        params: Optional[Dict[str, Any]] = None,
-        json: Optional[Dict[str, Any]] = None,
+        headers: dict[str, Any] | None = None,
+        params: dict[str, Any] | None = None,
+        json: dict[str, Any] | None = None,
         **kwargs,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         """Helper method to make HTTP requests to the Customizer API."""
         url = f"{self.customizer_url}{path}"
         request_headers = self.headers.copy()
@@ -93,8 +99,9 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         if json and "Content-Type" not in request_headers:
             request_headers["Content-Type"] = "application/json"
 
+        session = await self._get_session()
         for _ in range(self.config.max_retries):
-            async with self.session.request(method, url, params=params, json=json, **kwargs) as response:
+            async with session.request(method, url, params=params, json=json, **kwargs) as response:
                 if response.status >= 400:
                     error_data = await response.json()
                     raise Exception(f"API request failed: {error_data}")
@@ -102,9 +109,9 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
     async def get_training_jobs(
         self,
-        page: Optional[int] = 1,
-        page_size: Optional[int] = 10,
-        sort: Optional[Literal["created_at", "-created_at"]] = "created_at",
+        page: int | None = 1,
+        page_size: int | None = 10,
+        sort: Literal["created_at", "-created_at"] | None = "created_at",
     ) -> ListNvidiaPostTrainingJobs:
         """Get all customization jobs.
         Updated the base class return type from ListPostTrainingJobsResponse to ListNvidiaPostTrainingJobs.
@@ -121,8 +128,8 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         jobs = []
         for job in response.get("data", []):
             job_id = job.pop("id")
-            job_status = job.pop("status", "unknown").lower()
-            mapped_status = STATUS_MAPPING.get(job_status, "unknown")
+            job_status = job.pop("status", "scheduled").lower()
+            mapped_status = STATUS_MAPPING.get(job_status, "scheduled")
 
             # Convert string timestamps to datetime objects
             created_at = (
@@ -176,7 +183,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         )
 
         api_status = response.pop("status").lower()
-        mapped_status = STATUS_MAPPING.get(api_status, "unknown")
+        mapped_status = STATUS_MAPPING.get(api_status, "scheduled")
 
         return NvidiaPostTrainingJobStatusResponse(
             status=JobStatus(mapped_status),
@@ -200,12 +207,12 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
     async def supervised_fine_tune(
         self,
         job_uuid: str,
-        training_config: Dict[str, Any],
-        hyperparam_search_config: Dict[str, Any],
-        logger_config: Dict[str, Any],
+        training_config: dict[str, Any],
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
         model: str,
-        checkpoint_dir: Optional[str],
-        algorithm_config: Optional[AlgorithmConfig] = None,
+        checkpoint_dir: str | None,
+        algorithm_config: AlgorithmConfig | None = None,
     ) -> NvidiaPostTrainingJob:
         """
         Fine-tunes a model on a dataset.
@@ -217,7 +224,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
         Parameters:
             training_config: TrainingConfig - Configuration for training
-            model: str - Model identifier
+            model: str - NeMo Customizer configuration name
             algorithm_config: Optional[AlgorithmConfig] - Algorithm-specific configuration
             checkpoint_dir: Optional[str] - Directory containing model checkpoints, ignored atm
             job_uuid: str - Unique identifier for the job, ignored atm
@@ -238,6 +245,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
         Supported models:
             - meta/llama-3.1-8b-instruct
+            - meta/llama-3.2-1b-instruct
 
         Supported algorithm configs:
             - LoRA, SFT
@@ -283,10 +291,6 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
             - LoRA config:
                 ## NeMo customizer specific LoRA parameters
-                - adapter_dim: int - Adapter dimension
-                    Default: 8 (supports powers of 2)
-                - adapter_dropout: float - Adapter dropout
-                    Default: None (0.0-1.0)
                 - alpha: int - Scaling factor for the LoRA update
                     Default: 16
             Note:
@@ -295,9 +299,6 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
             User is informed about unsupported parameters via warnings.
         """
-        # Map model to nvidia model name
-        # ToDo: only supports llama-3.1-8b-instruct now, need to update this to support other models
-        nvidia_model = self.get_provider_model_id(model)
 
         # Check for unsupported method parameters
         unsupported_method_params = []
@@ -329,7 +330,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
             },
             "data_config": {"dataset_id", "batch_size"},
             "optimizer_config": {"lr", "weight_decay"},
-            "lora_config": {"type", "adapter_dim", "adapter_dropout", "alpha"},
+            "lora_config": {"type", "alpha"},
         }
 
         # Validate all parameters at once
@@ -343,7 +344,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
         # Prepare base job configuration
         job_config = {
-            "config": nvidia_model,
+            "config": model,
             "dataset": {
                 "name": training_config["data_config"]["dataset_id"],
                 "namespace": self.config.dataset_namespace,
@@ -388,16 +389,10 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
         # Handle LoRA-specific configuration
         if algorithm_config:
-            if isinstance(algorithm_config, dict) and algorithm_config.get("type") == "LoRA":
+            if algorithm_config.type == "LoRA":
                 warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config")
                 job_config["hyperparameters"]["lora"] = {
-                    k: v
-                    for k, v in {
-                        "adapter_dim": algorithm_config.get("adapter_dim"),
-                        "alpha": algorithm_config.get("alpha"),
-                        "adapter_dropout": algorithm_config.get("adapter_dropout"),
-                    }.items()
-                    if v is not None
+                    k: v for k, v in {"alpha": algorithm_config.alpha}.items() if v is not None
                 }
             else:
                 raise NotImplementedError(f"Unsupported algorithm config: {algorithm_config}")
@@ -425,8 +420,8 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         finetuned_model: str,
         algorithm_config: DPOAlignmentConfig,
         training_config: TrainingConfig,
-        hyperparam_search_config: Dict[str, Any],
-        logger_config: Dict[str, Any],
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
     ) -> PostTrainingJob:
         """Optimize a model based on preference data."""
         raise NotImplementedError("Preference optimization is not implemented yet")
diff --git a/llama_stack/providers/remote/post_training/nvidia/utils.py b/llama_stack/providers/remote/post_training/nvidia/utils.py
index ac47966af..d6e1016b2 100644
--- a/llama_stack/providers/remote/post_training/nvidia/utils.py
+++ b/llama_stack/providers/remote/post_training/nvidia/utils.py
@@ -6,7 +6,7 @@
 
 import logging
 import warnings
-from typing import Any, Dict, Set, Tuple
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -18,7 +18,7 @@ from .config import NvidiaPostTrainingConfig
 logger = logging.getLogger(__name__)
 
 
-def warn_unsupported_params(config_dict: Any, supported_keys: Set[str], config_name: str) -> None:
+def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None:
     keys = set(config_dict.__annotations__.keys()) if isinstance(config_dict, BaseModel) else config_dict.keys()
     unsupported_params = [k for k in keys if k not in supported_keys]
     if unsupported_params:
@@ -28,7 +28,7 @@ def warn_unsupported_params(config_dict: Any, supported_keys: Set[str], config_n
 
 
 def validate_training_params(
-    training_config: Dict[str, Any], supported_keys: Set[str], config_name: str = "TrainingConfig"
+    training_config: dict[str, Any], supported_keys: set[str], config_name: str = "TrainingConfig"
 ) -> None:
     """
     Validates training parameters against supported keys.
@@ -57,7 +57,7 @@ def validate_training_params(
 
 
 # ToDo: implement post health checks for customizer are enabled
-async def _get_health(url: str) -> Tuple[bool, bool]: ...
+async def _get_health(url: str) -> tuple[bool, bool]: ...
 
 
 async def check_health(config: NvidiaPostTrainingConfig) -> None: ...
diff --git a/llama_stack/providers/remote/safety/bedrock/bedrock.py b/llama_stack/providers/remote/safety/bedrock/bedrock.py
index 2f960eead..c43b51073 100644
--- a/llama_stack/providers/remote/safety/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/safety/bedrock/bedrock.py
@@ -6,7 +6,7 @@
 
 import json
 import logging
-from typing import Any, Dict, List
+from typing import Any
 
 from llama_stack.apis.inference import Message
 from llama_stack.apis.safety import (
@@ -53,7 +53,7 @@ class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):
             )
 
     async def run_shield(
-        self, shield_id: str, messages: List[Message], params: Dict[str, Any] = None
+        self, shield_id: str, messages: list[Message], params: dict[str, Any] = None
     ) -> RunShieldResponse:
         shield = await self.shield_store.get_shield(shield_id)
         if not shield:
diff --git a/llama_stack/providers/remote/safety/nvidia/README.md b/llama_stack/providers/remote/safety/nvidia/README.md
new file mode 100644
index 000000000..434db32fb
--- /dev/null
+++ b/llama_stack/providers/remote/safety/nvidia/README.md
@@ -0,0 +1,77 @@
+# NVIDIA Safety Provider for LlamaStack
+
+This provider enables safety checks and guardrails for LLM interactions using NVIDIA's NeMo Guardrails service.
+
+## Features
+
+- Run safety checks for messages
+
+## Getting Started
+
+### Prerequisites
+
+- LlamaStack with NVIDIA configuration
+- Access to NVIDIA NeMo Guardrails service
+- NIM for model to use for safety check is deployed
+
+### Setup
+
+Build the NVIDIA environment:
+
+```bash
+llama stack build --template nvidia --image-type conda
+```
+
+### Basic Usage using the LlamaStack Python Client
+
+#### Initialize the client
+
+```python
+import os
+
+os.environ["NVIDIA_API_KEY"] = "your-api-key"
+os.environ["NVIDIA_GUARDRAILS_URL"] = "http://guardrails.test"
+
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+```
+
+#### Create a safety shield
+
+```python
+from llama_stack.apis.safety import Shield
+from llama_stack.apis.inference import Message
+
+# Create a safety shield
+shield = Shield(
+    shield_id="your-shield-id",
+    provider_resource_id="safety-model-id",  # The model to use for safety checks
+    description="Safety checks for content moderation",
+)
+
+# Register the shield
+await client.safety.register_shield(shield)
+```
+
+#### Run safety checks
+
+```python
+# Messages to check
+messages = [Message(role="user", content="Your message to check")]
+
+# Run safety check
+response = await client.safety.run_shield(
+    shield_id="your-shield-id",
+    messages=messages,
+)
+
+# Check for violations
+if response.violation:
+    print(f"Safety violation detected: {response.violation.user_message}")
+    print(f"Violation level: {response.violation.violation_level}")
+    print(f"Metadata: {response.violation.metadata}")
+else:
+    print("No safety violations detected")
+```
diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/llama_stack/providers/remote/safety/nvidia/config.py
index 3df80ed4f..4ca703a4d 100644
--- a/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/llama_stack/providers/remote/safety/nvidia/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import os
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -27,10 +27,10 @@ class NVIDIASafetyConfig(BaseModel):
         default_factory=lambda: os.getenv("GUARDRAILS_SERVICE_URL", "http://0.0.0.0:7331"),
         description="The url for accessing the guardrails service",
     )
-    config_id: Optional[str] = Field(default="self-check", description="Config ID to use from the config store")
+    config_id: str | None = Field(default="self-check", description="Config ID to use from the config store")
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
             "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}",
             "config_id": "self-check",
diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py
index 1ff4a6ad9..411badb1c 100644
--- a/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -5,15 +5,15 @@
 # the root directory of this source tree.
 
 import logging
-from typing import Any, List, Optional
+from typing import Any
 
 import requests
 
 from llama_stack.apis.inference import Message
 from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel
 from llama_stack.apis.shields import Shield
-from llama_stack.distribution.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.datatypes import ShieldsProtocolPrivate
+from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
 
 from .config import NVIDIASafetyConfig
 
@@ -28,7 +28,6 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
         Args:
             config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID.
         """
-        print(f"Initializing NVIDIASafetyAdapter({config.guardrails_service_url})...")
         self.config = config
 
     async def initialize(self) -> None:
@@ -42,7 +41,7 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
             raise ValueError("Shield model not provided.")
 
     async def run_shield(
-        self, shield_id: str, messages: List[Message], params: Optional[dict[str, Any]] = None
+        self, shield_id: str, messages: list[Message], params: dict[str, Any] | None = None
     ) -> RunShieldResponse:
         """
         Run a safety shield check against the provided messages.
@@ -113,7 +112,7 @@ class NeMoGuardrails:
         response.raise_for_status()
         return response.json()
 
-    async def run(self, messages: List[Message]) -> RunShieldResponse:
+    async def run(self, messages: list[Message]) -> RunShieldResponse:
         """
         Queries the /v1/guardrails/checks endpoint of the NeMo guardrails deployed API.
 
@@ -127,9 +126,10 @@ class NeMoGuardrails:
         Raises:
             requests.HTTPError: If the POST request fails.
         """
+        request_messages = [await convert_message_to_openai_dict_new(message) for message in messages]
         request_data = {
             "model": self.model,
-            "messages": convert_pydantic_to_json_value(messages),
+            "messages": request_messages,
             "temperature": self.temperature,
             "top_p": 1,
             "frequency_penalty": 0,
diff --git a/llama_stack/providers/remote/safety/sambanova/__init__.py b/llama_stack/providers/remote/safety/sambanova/__init__.py
new file mode 100644
index 000000000..bb9d15374
--- /dev/null
+++ b/llama_stack/providers/remote/safety/sambanova/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from typing import Any
+
+from .config import SambaNovaSafetyConfig
+
+
+async def get_adapter_impl(config: SambaNovaSafetyConfig, _deps) -> Any:
+    from .sambanova import SambaNovaSafetyAdapter
+
+    impl = SambaNovaSafetyAdapter(config)
+    await impl.initialize()
+    return impl
diff --git a/llama_stack/providers/remote/safety/sambanova/config.py b/llama_stack/providers/remote/safety/sambanova/config.py
new file mode 100644
index 000000000..383cea244
--- /dev/null
+++ b/llama_stack/providers/remote/safety/sambanova/config.py
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from pydantic import BaseModel, Field, SecretStr
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class SambaNovaProviderDataValidator(BaseModel):
+    sambanova_api_key: str | None = Field(
+        default=None,
+        description="Sambanova Cloud API key",
+    )
+
+
+@json_schema_type
+class SambaNovaSafetyConfig(BaseModel):
+    url: str = Field(
+        default="https://api.sambanova.ai/v1",
+        description="The URL for the SambaNova AI server",
+    )
+    api_key: SecretStr | None = Field(
+        default=None,
+        description="The SambaNova cloud API Key",
+    )
+
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]:
+        return {
+            "url": "https://api.sambanova.ai/v1",
+            "api_key": api_key,
+        }
diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/llama_stack/providers/remote/safety/sambanova/sambanova.py
new file mode 100644
index 000000000..84c8267ae
--- /dev/null
+++ b/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -0,0 +1,100 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import logging
+from typing import Any
+
+import litellm
+import requests
+
+from llama_stack.apis.inference import Message
+from llama_stack.apis.safety import (
+    RunShieldResponse,
+    Safety,
+    SafetyViolation,
+    ViolationLevel,
+)
+from llama_stack.apis.shields import Shield
+from llama_stack.distribution.request_headers import NeedsRequestProviderData
+from llama_stack.providers.datatypes import ShieldsProtocolPrivate
+from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
+
+from .config import SambaNovaSafetyConfig
+
+logger = logging.getLogger(__name__)
+
+CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"
+
+
+class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProviderData):
+    def __init__(self, config: SambaNovaSafetyConfig) -> None:
+        self.config = config
+
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        pass
+
+    def _get_api_key(self) -> str:
+        config_api_key = self.config.api_key if self.config.api_key else None
+        if config_api_key:
+            return config_api_key.get_secret_value()
+        else:
+            provider_data = self.get_request_provider_data()
+            if provider_data is None or not provider_data.sambanova_api_key:
+                raise ValueError(
+                    'Pass Sambanova API Key in the header X-LlamaStack-Provider-Data as { "sambanova_api_key": <your api key> }'
+                )
+            return provider_data.sambanova_api_key
+
+    async def register_shield(self, shield: Shield) -> None:
+        list_models_url = self.config.url + "/models"
+        try:
+            response = requests.get(list_models_url)
+            response.raise_for_status()
+        except requests.exceptions.RequestException as e:
+            raise RuntimeError(f"Request to {list_models_url} failed") from e
+        available_models = [model.get("id") for model in response.json().get("data", {})]
+        if (
+            len(available_models) == 0
+            or "guard" not in shield.provider_resource_id.lower()
+            or shield.provider_resource_id.split("sambanova/")[-1] not in available_models
+        ):
+            raise ValueError(f"Shield {shield.provider_resource_id} not found in SambaNova")
+
+    async def run_shield(
+        self, shield_id: str, messages: list[Message], params: dict[str, Any] | None = None
+    ) -> RunShieldResponse:
+        shield = await self.shield_store.get_shield(shield_id)
+        if not shield:
+            raise ValueError(f"Shield {shield_id} not found")
+
+        shield_params = shield.params
+        logger.debug(f"run_shield::{shield_params}::messages={messages}")
+        content_messages = [await convert_message_to_openai_dict_new(m) for m in messages]
+        logger.debug(f"run_shield::final:messages::{json.dumps(content_messages, indent=2)}:")
+
+        response = litellm.completion(
+            model=shield.provider_resource_id, messages=content_messages, api_key=self._get_api_key()
+        )
+        shield_message = response.choices[0].message.content
+
+        if "unsafe" in shield_message.lower():
+            user_message = CANNED_RESPONSE_TEXT
+            violation_type = shield_message.split("\n")[-1]
+            metadata = {"violation_type": violation_type}
+
+            return RunShieldResponse(
+                violation=SafetyViolation(
+                    user_message=user_message,
+                    violation_level=ViolationLevel.ERROR,
+                    metadata=metadata,
+                )
+            )
+
+        return RunShieldResponse()
diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index b34c9fd9d..7e82cb6d4 100644
--- a/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -5,26 +5,26 @@
 # the root directory of this source tree.
 
 import json
-from typing import Any, Dict, Optional
+from typing import Any
 
 import httpx
 
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.tools import (
     ListToolDefsResponse,
-    Tool,
     ToolDef,
+    ToolGroup,
     ToolInvocationResult,
     ToolParameter,
     ToolRuntime,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ToolsProtocolPrivate
+from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import BingSearchToolConfig
 
 
-class BingSearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
+class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
     def __init__(self, config: BingSearchToolConfig):
         self.config = config
         self.url = "https://api.bing.microsoft.com/v7.0/search"
@@ -32,10 +32,10 @@ class BingSearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsRequestP
     async def initialize(self):
         pass
 
-    async def register_tool(self, tool: Tool) -> None:
+    async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
         pass
 
-    async def unregister_tool(self, tool_id: str) -> None:
+    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
         return
 
     def _get_api_key(self) -> str:
@@ -50,7 +50,7 @@ class BingSearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsRequestP
         return provider_data.bing_search_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -68,7 +68,7 @@ class BingSearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsRequestP
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
         api_key = self._get_api_key()
         headers = {
             "Ocp-Apim-Subscription-Key": api_key,
diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/config.py b/llama_stack/providers/remote/tool_runtime/bing_search/config.py
index 4f089439f..30269dbc1 100644
--- a/llama_stack/providers/remote/tool_runtime/bing_search/config.py
+++ b/llama_stack/providers/remote/tool_runtime/bing_search/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -12,11 +12,11 @@ from pydantic import BaseModel
 class BingSearchToolConfig(BaseModel):
     """Configuration for Bing Search Tool Runtime"""
 
-    api_key: Optional[str] = None
+    api_key: str | None = None
     top_k: int = 3
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "api_key": "${env.BING_API_KEY:}",
         }
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index 41f3ce823..b96b9e59c 100644
--- a/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -4,37 +4,37 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 import httpx
 
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.tools import (
     ListToolDefsResponse,
-    Tool,
     ToolDef,
+    ToolGroup,
     ToolInvocationResult,
     ToolParameter,
     ToolRuntime,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.models.llama.datatypes import BuiltinTool
-from llama_stack.providers.datatypes import ToolsProtocolPrivate
+from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import BraveSearchToolConfig
 
 
-class BraveSearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
+class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
     def __init__(self, config: BraveSearchToolConfig):
         self.config = config
 
     async def initialize(self):
         pass
 
-    async def register_tool(self, tool: Tool) -> None:
+    async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
         pass
 
-    async def unregister_tool(self, tool_id: str) -> None:
+    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
         return
 
     def _get_api_key(self) -> str:
@@ -49,7 +49,7 @@ class BraveSearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsRequest
         return provider_data.brave_search_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -68,7 +68,7 @@ class BraveSearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsRequest
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
         api_key = self._get_api_key()
         url = "https://api.search.brave.com/res/v1/web/search"
         headers = {
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/llama_stack/providers/remote/tool_runtime/brave_search/config.py
index ab6053609..37ba21304 100644
--- a/llama_stack/providers/remote/tool_runtime/brave_search/config.py
+++ b/llama_stack/providers/remote/tool_runtime/brave_search/config.py
@@ -4,13 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
 
 class BraveSearchToolConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="The Brave Search API Key",
     )
@@ -20,7 +20,7 @@ class BraveSearchToolConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
             "api_key": "${env.BRAVE_SEARCH_API_KEY:}",
             "max_results": 3,
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py b/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
index fb1f558e5..051a880a7 100644
--- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
+++ b/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
@@ -4,18 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from pydantic import BaseModel
-
-from .config import ModelContextProtocolConfig
+from .config import MCPProviderConfig
 
 
-class ModelContextProtocolToolProviderDataValidator(BaseModel):
-    api_key: str
-
-
-async def get_adapter_impl(config: ModelContextProtocolConfig, _deps):
+async def get_adapter_impl(config: MCPProviderConfig, _deps):
     from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
 
-    impl = ModelContextProtocolToolRuntimeImpl(config)
+    impl = ModelContextProtocolToolRuntimeImpl(config, _deps)
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py b/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
index 30ac407bc..b8c5e77fd 100644
--- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
+++ b/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
@@ -4,12 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
 
-class ModelContextProtocolConfig(BaseModel):
+class MCPProviderDataValidator(BaseModel):
+    # mcp_endpoint => dict of headers to send
+    mcp_headers: dict[str, dict[str, str]] | None = None
+
+
+class MCPProviderConfig(BaseModel):
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {}
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index 676917225..a9b252dfe 100644
--- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -4,66 +4,50 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 from urllib.parse import urlparse
 
-from mcp import ClientSession
-from mcp.client.sse import sse_client
-
 from llama_stack.apis.common.content_types import URL
+from llama_stack.apis.datatypes import Api
 from llama_stack.apis.tools import (
     ListToolDefsResponse,
-    ToolDef,
+    ToolGroup,
     ToolInvocationResult,
-    ToolParameter,
     ToolRuntime,
 )
-from llama_stack.providers.datatypes import ToolsProtocolPrivate
+from llama_stack.distribution.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
+from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
 
-from .config import ModelContextProtocolConfig
+from .config import MCPProviderConfig
+
+logger = get_logger(__name__, category="tools")
 
 
-class ModelContextProtocolToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime):
-    def __init__(self, config: ModelContextProtocolConfig):
+class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
+    def __init__(self, config: MCPProviderConfig, _deps: dict[Api, Any]):
         self.config = config
 
     async def initialize(self):
         pass
 
+    async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
+        pass
+
+    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
+        return
+
     async def list_runtime_tools(
-        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
     ) -> ListToolDefsResponse:
+        # this endpoint should be retrieved by getting the tool group right?
         if mcp_endpoint is None:
             raise ValueError("mcp_endpoint is required")
+        headers = await self.get_headers_from_request(mcp_endpoint.uri)
+        return await list_mcp_tools(mcp_endpoint.uri, headers)
 
-        tools = []
-        async with sse_client(mcp_endpoint.uri) as streams:
-            async with ClientSession(*streams) as session:
-                await session.initialize()
-                tools_result = await session.list_tools()
-                for tool in tools_result.tools:
-                    parameters = []
-                    for param_name, param_schema in tool.inputSchema.get("properties", {}).items():
-                        parameters.append(
-                            ToolParameter(
-                                name=param_name,
-                                parameter_type=param_schema.get("type", "string"),
-                                description=param_schema.get("description", ""),
-                            )
-                        )
-                    tools.append(
-                        ToolDef(
-                            name=tool.name,
-                            description=tool.description,
-                            parameters=parameters,
-                            metadata={
-                                "endpoint": mcp_endpoint.uri,
-                            },
-                        )
-                    )
-        return ListToolDefsResponse(data=tools)
-
-    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
         tool = await self.tool_store.get_tool(tool_name)
         if tool.metadata is None or tool.metadata.get("endpoint") is None:
             raise ValueError(f"Tool {tool_name} does not have metadata")
@@ -71,12 +55,19 @@ class ModelContextProtocolToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime):
         if urlparse(endpoint).scheme not in ("http", "https"):
             raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
 
-        async with sse_client(endpoint) as streams:
-            async with ClientSession(*streams) as session:
-                await session.initialize()
-                result = await session.call_tool(tool.identifier, kwargs)
+        headers = await self.get_headers_from_request(endpoint)
+        return await invoke_mcp_tool(endpoint, headers, tool_name, kwargs)
 
-        return ToolInvocationResult(
-            content="\n".join([result.model_dump_json() for result in result.content]),
-            error_code=1 if result.isError else 0,
-        )
+    async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
+        def canonicalize_uri(uri: str) -> str:
+            return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
+
+        headers = {}
+
+        provider_data = self.get_request_provider_data()
+        if provider_data and provider_data.mcp_headers:
+            for uri, values in provider_data.mcp_headers.items():
+                if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
+                    continue
+                headers.update(values)
+        return headers
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
index 945430bb1..c9b18d30d 100644
--- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
+++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
@@ -4,13 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
 
 class TavilySearchToolConfig(BaseModel):
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="The Tavily Search API Key",
     )
@@ -20,7 +20,7 @@ class TavilySearchToolConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
             "api_key": "${env.TAVILY_SEARCH_API_KEY:}",
             "max_results": 3,
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index 719d6be14..1fe91fd7f 100644
--- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -5,36 +5,36 @@
 # the root directory of this source tree.
 
 import json
-from typing import Any, Dict, Optional
+from typing import Any
 
 import httpx
 
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.tools import (
     ListToolDefsResponse,
-    Tool,
     ToolDef,
+    ToolGroup,
     ToolInvocationResult,
     ToolParameter,
     ToolRuntime,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ToolsProtocolPrivate
+from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import TavilySearchToolConfig
 
 
-class TavilySearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
+class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
     def __init__(self, config: TavilySearchToolConfig):
         self.config = config
 
     async def initialize(self):
         pass
 
-    async def register_tool(self, tool: Tool) -> None:
+    async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
         pass
 
-    async def unregister_tool(self, tool_id: str) -> None:
+    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
         return
 
     def _get_api_key(self) -> str:
@@ -49,7 +49,7 @@ class TavilySearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsReques
         return provider_data.tavily_search_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -67,7 +67,7 @@ class TavilySearchToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsReques
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
         api_key = self._get_api_key()
         async with httpx.AsyncClient() as client:
             response = await client.post(
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
index 8ea49c7b5..aefc86bd6 100644
--- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
+++ b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -12,10 +12,10 @@ from pydantic import BaseModel
 class WolframAlphaToolConfig(BaseModel):
     """Configuration for WolframAlpha Tool Runtime"""
 
-    api_key: Optional[str] = None
+    api_key: str | None = None
 
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "api_key": "${env.WOLFRAM_ALPHA_API_KEY:}",
         }
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index b3e0e120c..6e1d0f61d 100644
--- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -5,26 +5,26 @@
 # the root directory of this source tree.
 
 import json
-from typing import Any, Dict, Optional
+from typing import Any
 
 import httpx
 
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.tools import (
     ListToolDefsResponse,
-    Tool,
     ToolDef,
+    ToolGroup,
     ToolInvocationResult,
     ToolParameter,
     ToolRuntime,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ToolsProtocolPrivate
+from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import WolframAlphaToolConfig
 
 
-class WolframAlphaToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
+class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
     def __init__(self, config: WolframAlphaToolConfig):
         self.config = config
         self.url = "https://api.wolframalpha.com/v2/query"
@@ -32,10 +32,10 @@ class WolframAlphaToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsReques
     async def initialize(self):
         pass
 
-    async def register_tool(self, tool: Tool) -> None:
+    async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
         pass
 
-    async def unregister_tool(self, tool_id: str) -> None:
+    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
         return
 
     def _get_api_key(self) -> str:
@@ -50,7 +50,7 @@ class WolframAlphaToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsReques
         return provider_data.wolfram_alpha_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -68,7 +68,7 @@ class WolframAlphaToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, NeedsReques
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
         api_key = self._get_api_key()
         params = {
             "input": kwargs["query"],
diff --git a/llama_stack/providers/remote/vector_io/chroma/__init__.py b/llama_stack/providers/remote/vector_io/chroma/__init__.py
index 8646b04d6..ebbc62b1c 100644
--- a/llama_stack/providers/remote/vector_io/chroma/__init__.py
+++ b/llama_stack/providers/remote/vector_io/chroma/__init__.py
@@ -4,14 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Dict
-
 from llama_stack.providers.datatypes import Api, ProviderSpec
 
 from .config import ChromaVectorIOConfig
 
 
-async def get_adapter_impl(config: ChromaVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .chroma import ChromaVectorIOAdapter
 
     impl = ChromaVectorIOAdapter(config, deps[Api.inference])
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 3bf3a7740..a59a38573 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -6,7 +6,7 @@
 import asyncio
 import json
 import logging
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
@@ -26,8 +26,7 @@ from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
 log = logging.getLogger(__name__)
 
-
-ChromaClientType = Union[chromadb.AsyncHttpClient, chromadb.PersistentClient]
+ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI
 
 
 # this is a helper to allow us to use async and non-async chroma clients interchangeably
@@ -42,7 +41,7 @@ class ChromaIndex(EmbeddingIndex):
         self.client = client
         self.collection = collection
 
-    async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
+    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         assert len(chunks) == len(embeddings), (
             f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
         )
@@ -85,11 +84,19 @@ class ChromaIndex(EmbeddingIndex):
     async def delete(self):
         await maybe_await(self.client.delete_collection(self.collection.name))
 
+    async def query_keyword(
+        self,
+        query_string: str,
+        k: int,
+        score_threshold: float,
+    ) -> QueryChunksResponse:
+        raise NotImplementedError("Keyword search is not supported in Chroma")
+
 
 class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     def __init__(
         self,
-        config: Union[RemoteChromaVectorIOConfig, InlineChromaVectorIOConfig],
+        config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
         inference_api: Api.inference,
     ) -> None:
         log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
@@ -137,8 +144,8 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     async def insert_chunks(
         self,
         vector_db_id: str,
-        chunks: List[Chunk],
-        ttl_seconds: Optional[int] = None,
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
     ) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
 
@@ -148,7 +155,7 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         self,
         vector_db_id: str,
         query: InterleavedContent,
-        params: Optional[Dict[str, Any]] = None,
+        params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
 
diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/llama_stack/providers/remote/vector_io/chroma/config.py
index 3e2463252..4e893fab4 100644
--- a/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/llama_stack/providers/remote/vector_io/chroma/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -13,5 +13,5 @@ class ChromaVectorIOConfig(BaseModel):
     url: str
 
     @classmethod
-    def sample_run_config(cls, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]:
         return {"url": url}
diff --git a/llama_stack/providers/remote/vector_io/milvus/__init__.py b/llama_stack/providers/remote/vector_io/milvus/__init__.py
index 84cb1d748..92dbfda2e 100644
--- a/llama_stack/providers/remote/vector_io/milvus/__init__.py
+++ b/llama_stack/providers/remote/vector_io/milvus/__init__.py
@@ -4,14 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Dict
-
 from llama_stack.providers.datatypes import Api, ProviderSpec
 
 from .config import MilvusVectorIOConfig
 
 
-async def get_adapter_impl(config: MilvusVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .milvus import MilvusVectorIOAdapter
 
     assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py
index 17da6b23d..9bdc7ed5c 100644
--- a/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 
 from llama_stack.schema_utils import json_schema_type
 
@@ -14,9 +14,11 @@ from llama_stack.schema_utils import json_schema_type
 @json_schema_type
 class MilvusVectorIOConfig(BaseModel):
     uri: str
-    token: Optional[str] = None
+    token: str | None = None
     consistency_level: str = "Strong"
 
+    model_config = ConfigDict(extra="allow")
+
     @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {"uri": "${env.MILVUS_ENDPOINT}", "token": "${env.MILVUS_TOKEN}"}
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 1949d293d..6628292db 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -9,7 +9,7 @@ import hashlib
 import logging
 import os
 import uuid
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
 
 from numpy.typing import NDArray
 from pymilvus import MilvusClient
@@ -39,7 +39,7 @@ class MilvusIndex(EmbeddingIndex):
         if await asyncio.to_thread(self.client.has_collection, self.collection_name):
             await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
 
-    async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
+    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         assert len(chunks) == len(embeddings), (
             f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
         )
@@ -73,7 +73,7 @@ class MilvusIndex(EmbeddingIndex):
             logger.error(f"Error inserting chunks into Milvus collection {self.collection_name}: {e}")
             raise e
 
-    async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         search_res = await asyncio.to_thread(
             self.client.search,
             collection_name=self.collection_name,
@@ -86,10 +86,18 @@ class MilvusIndex(EmbeddingIndex):
         scores = [res["distance"] for res in search_res[0]]
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
+    async def query_keyword(
+        self,
+        query_string: str,
+        k: int,
+        score_threshold: float,
+    ) -> QueryChunksResponse:
+        raise NotImplementedError("Keyword search is not supported in Milvus")
+
 
 class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     def __init__(
-        self, config: Union[RemoteMilvusVectorIOConfig, InlineMilvusVectorIOConfig], inference_api: Api.inference
+        self, config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, inference_api: Api.inference
     ) -> None:
         self.config = config
         self.cache = {}
@@ -124,7 +132,7 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
 
         self.cache[vector_db.identifier] = index
 
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> Optional[VectorDBWithIndex]:
+    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
         if vector_db_id in self.cache:
             return self.cache[vector_db_id]
 
@@ -148,8 +156,8 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     async def insert_chunks(
         self,
         vector_db_id: str,
-        chunks: List[Chunk],
-        ttl_seconds: Optional[int] = None,
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
     ) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
@@ -161,7 +169,7 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         self,
         vector_db_id: str,
         query: InterleavedContent,
-        params: Optional[Dict[str, Any]] = None,
+        params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
@@ -172,7 +180,7 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
 
 def generate_chunk_id(document_id: str, chunk_text: str) -> str:
     """Generate a unique chunk ID using a hash of document ID and chunk text."""
-    hash_input = f"{document_id}:{chunk_text}".encode("utf-8")
+    hash_input = f"{document_id}:{chunk_text}".encode()
     return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
 
 
diff --git a/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index 089d890b7..9f528db74 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -4,14 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Dict
-
 from llama_stack.providers.datatypes import Api, ProviderSpec
 
 from .config import PGVectorVectorIOConfig
 
 
-async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .pgvector import PGVectorVectorIOAdapter
 
     impl = PGVectorVectorIOAdapter(config, deps[Api.inference])
diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py
index e9eb0f12d..04b92a2e4 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -28,5 +28,5 @@ class PGVectorVectorIOConfig(BaseModel):
         user: str = "${env.PGVECTOR_USER}",
         password: str = "${env.PGVECTOR_PASSWORD}",
         **kwargs: Any,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         return {"host": host, "port": port, "db": db, "user": user, "password": password}
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 7c683e126..ea918c552 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import logging
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 
 import psycopg2
 from numpy.typing import NDArray
@@ -33,7 +33,7 @@ def check_extension_version(cur):
     return result[0] if result else None
 
 
-def upsert_models(conn, keys_models: List[Tuple[str, BaseModel]]):
+def upsert_models(conn, keys_models: list[tuple[str, BaseModel]]):
     with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         query = sql.SQL(
             """
@@ -74,7 +74,7 @@ class PGVectorIndex(EmbeddingIndex):
             """
             )
 
-    async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
+    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         assert len(chunks) == len(embeddings), (
             f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
         )
@@ -99,7 +99,7 @@ class PGVectorIndex(EmbeddingIndex):
         with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
             execute_values(cur, query, values, template="(%s, %s, %s::vector)")
 
-    async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
             cur.execute(
                 f"""
@@ -120,6 +120,14 @@ class PGVectorIndex(EmbeddingIndex):
 
             return QueryChunksResponse(chunks=chunks, scores=scores)
 
+    async def query_keyword(
+        self,
+        query_string: str,
+        k: int,
+        score_threshold: float,
+    ) -> QueryChunksResponse:
+        raise NotImplementedError("Keyword search is not supported in PGVector")
+
     async def delete(self):
         with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
             cur.execute(f"DROP TABLE IF EXISTS {self.table_name}")
@@ -180,8 +188,8 @@ class PGVectorVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     async def insert_chunks(
         self,
         vector_db_id: str,
-        chunks: List[Chunk],
-        ttl_seconds: Optional[int] = None,
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
     ) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         await index.insert_chunks(chunks)
@@ -190,7 +198,7 @@ class PGVectorVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         self,
         vector_db_id: str,
         query: InterleavedContent,
-        params: Optional[Dict[str, Any]] = None,
+        params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         return await index.query_chunks(query, params)
diff --git a/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/llama_stack/providers/remote/vector_io/qdrant/__init__.py
index f5bb7f84c..029de285f 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/__init__.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/__init__.py
@@ -4,14 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Dict
-
 from llama_stack.providers.datatypes import Api, ProviderSpec
 
 from .config import QdrantVectorIOConfig
 
 
-async def get_adapter_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .qdrant import QdrantVectorIOAdapter
 
     impl = QdrantVectorIOAdapter(config, deps[Api.inference])
diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py
index 6d7eebe23..314d3f5f1 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Optional
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -13,19 +13,19 @@ from llama_stack.schema_utils import json_schema_type
 
 @json_schema_type
 class QdrantVectorIOConfig(BaseModel):
-    location: Optional[str] = None
-    url: Optional[str] = None
-    port: Optional[int] = 6333
+    location: str | None = None
+    url: str | None = None
+    port: int | None = 6333
     grpc_port: int = 6334
     prefer_grpc: bool = False
-    https: Optional[bool] = None
-    api_key: Optional[str] = None
-    prefix: Optional[str] = None
-    timeout: Optional[int] = None
-    host: Optional[str] = None
+    https: bool | None = None
+    api_key: str | None = None
+    prefix: str | None = None
+    timeout: int | None = None
+    host: str | None = None
 
     @classmethod
-    def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
             "api_key": "${env.QDRANT_API_KEY}",
         }
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 9e7788dc0..ff0690083 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -6,7 +6,7 @@
 
 import logging
 import uuid
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
 
 from numpy.typing import NDArray
 from qdrant_client import AsyncQdrantClient, models
@@ -44,7 +44,7 @@ class QdrantIndex(EmbeddingIndex):
         self.client = client
         self.collection_name = collection_name
 
-    async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
+    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         assert len(chunks) == len(embeddings), (
             f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
         )
@@ -68,7 +68,7 @@ class QdrantIndex(EmbeddingIndex):
 
         await self.client.upsert(collection_name=self.collection_name, points=points)
 
-    async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         results = (
             await self.client.query_points(
                 collection_name=self.collection_name,
@@ -95,13 +95,21 @@ class QdrantIndex(EmbeddingIndex):
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
+    async def query_keyword(
+        self,
+        query_string: str,
+        k: int,
+        score_threshold: float,
+    ) -> QueryChunksResponse:
+        raise NotImplementedError("Keyword search is not supported in Qdrant")
+
     async def delete(self):
         await self.client.delete_collection(collection_name=self.collection_name)
 
 
 class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     def __init__(
-        self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference
+        self, config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig, inference_api: Api.inference
     ) -> None:
         self.config = config
         self.client: AsyncQdrantClient = None
@@ -131,7 +139,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
             await self.cache[vector_db_id].index.delete()
             del self.cache[vector_db_id]
 
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> Optional[VectorDBWithIndex]:
+    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
         if vector_db_id in self.cache:
             return self.cache[vector_db_id]
 
@@ -150,8 +158,8 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     async def insert_chunks(
         self,
         vector_db_id: str,
-        chunks: List[Chunk],
-        ttl_seconds: Optional[int] = None,
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
     ) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
@@ -163,7 +171,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         self,
         vector_db_id: str,
         query: InterleavedContent,
-        params: Optional[Dict[str, Any]] = None,
+        params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
diff --git a/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/llama_stack/providers/remote/vector_io/weaviate/__init__.py
index c93c628d8..22e116c22 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/__init__.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/__init__.py
@@ -4,14 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Dict
-
 from llama_stack.providers.datatypes import Api, ProviderSpec
 
-from .config import WeaviateRequestProviderData, WeaviateVectorIOConfig  # noqa: F401
+from .config import WeaviateVectorIOConfig
 
 
-async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .weaviate import WeaviateVectorIOAdapter
 
     impl = WeaviateVectorIOAdapter(config, deps[Api.inference])
diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/llama_stack/providers/remote/vector_io/weaviate/config.py
index cc587f252..a8c6e3e2c 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -16,5 +16,5 @@ class WeaviateRequestProviderData(BaseModel):
 
 class WeaviateVectorIOConfig(BaseModel):
     @classmethod
-    def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {}
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 52aa2f3a3..e6fe8ccd3 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 import json
 import logging
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 import weaviate
 import weaviate.classes as wvc
@@ -33,7 +33,7 @@ class WeaviateIndex(EmbeddingIndex):
         self.client = client
         self.collection_name = collection_name
 
-    async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
+    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         assert len(chunks) == len(embeddings), (
             f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
         )
@@ -55,7 +55,7 @@ class WeaviateIndex(EmbeddingIndex):
         # TODO: make this async friendly
         collection.data.insert_many(data_objects)
 
-    async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         collection = self.client.collections.get(self.collection_name)
 
         results = collection.query.near_vector(
@@ -80,10 +80,18 @@ class WeaviateIndex(EmbeddingIndex):
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
-    async def delete(self, chunk_ids: List[str]) -> None:
+    async def delete(self, chunk_ids: list[str]) -> None:
         collection = self.client.collections.get(self.collection_name)
         collection.data.delete_many(where=Filter.by_property("id").contains_any(chunk_ids))
 
+    async def query_keyword(
+        self,
+        query_string: str,
+        k: int,
+        score_threshold: float,
+    ) -> QueryChunksResponse:
+        raise NotImplementedError("Keyword search is not supported in Weaviate")
+
 
 class WeaviateVectorIOAdapter(
     VectorIO,
@@ -144,7 +152,7 @@ class WeaviateVectorIOAdapter(
             self.inference_api,
         )
 
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> Optional[VectorDBWithIndex]:
+    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
         if vector_db_id in self.cache:
             return self.cache[vector_db_id]
 
@@ -167,8 +175,8 @@ class WeaviateVectorIOAdapter(
     async def insert_chunks(
         self,
         vector_db_id: str,
-        chunks: List[Chunk],
-        ttl_seconds: Optional[int] = None,
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
     ) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
@@ -180,7 +188,7 @@ class WeaviateVectorIOAdapter(
         self,
         vector_db_id: str,
         query: InterleavedContent,
-        params: Optional[Dict[str, Any]] = None,
+        params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
diff --git a/llama_stack/providers/tests/ci_test_config.yaml b/llama_stack/providers/tests/ci_test_config.yaml
deleted file mode 100644
index 3edcd38bf..000000000
--- a/llama_stack/providers/tests/ci_test_config.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-inference:
-  tests:
-  - inference/test_vision_inference.py::test_vision_chat_completion_streaming
-  - inference/test_vision_inference.py::test_vision_chat_completion_non_streaming
-  - inference/test_text_inference.py::test_structured_output
-  - inference/test_text_inference.py::test_chat_completion_streaming
-  - inference/test_text_inference.py::test_chat_completion_non_streaming
-  - inference/test_text_inference.py::test_chat_completion_with_tool_calling
-  - inference/test_text_inference.py::test_chat_completion_with_tool_calling_streaming
-
-  scenarios:
-  - provider_fixtures:
-      inference: ollama
-  - fixture_combo_id: fireworks
-  - provider_fixtures:
-      inference: together
-    # - inference: tgi
-    # - inference: vllm_remote
-
-  inference_models:
-  - meta-llama/Llama-3.1-8B-Instruct
-  - meta-llama/Llama-3.2-11B-Vision-Instruct
-
-
-agents:
-  tests:
-   - agents/test_agents.py::test_agent_turns_with_safety
-   - agents/test_agents.py::test_rag_agent
-
-  scenarios:
-  - fixture_combo_id: ollama
-  - fixture_combo_id: together
-  - fixture_combo_id: fireworks
-
-  inference_models:
-  - meta-llama/Llama-3.2-1B-Instruct
-
-  safety_shield: meta-llama/Llama-Guard-3-1B
-
-
-memory:
-  tests:
-   - memory/test_memory.py::test_query_documents
-
-  scenarios:
-  - fixture_combo_id: ollama
-  - provider_fixtures:
-      inference: sentence_transformers
-      memory: faiss
-  - fixture_combo_id: chroma
-
-  inference_models:
-  - meta-llama/Llama-3.2-1B-Instruct
-
-  embedding_model: all-MiniLM-L6-v2
diff --git a/llama_stack/providers/tests/conftest.py b/llama_stack/providers/tests/conftest.py
deleted file mode 100644
index d3e715b7e..000000000
--- a/llama_stack/providers/tests/conftest.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-from collections import defaultdict
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-import pytest
-import yaml
-from dotenv import load_dotenv
-from pydantic import BaseModel, Field
-from termcolor import colored
-
-from llama_stack.distribution.datatypes import Provider
-from llama_stack.providers.datatypes import RemoteProviderConfig
-
-from .env import get_env_or_fail
-from .report import Report
-
-
-class ProviderFixture(BaseModel):
-    providers: List[Provider]
-    provider_data: Optional[Dict[str, Any]] = None
-
-
-class TestScenario(BaseModel):
-    # provider fixtures can be either a mark or a dictionary of api -> providers
-    provider_fixtures: Dict[str, str] = Field(default_factory=dict)
-    fixture_combo_id: Optional[str] = None
-
-
-class APITestConfig(BaseModel):
-    scenarios: List[TestScenario] = Field(default_factory=list)
-    inference_models: List[str] = Field(default_factory=list)
-
-    # test name format should be <relative_path.py>::<test_name>
-    tests: List[str] = Field(default_factory=list)
-
-
-class MemoryApiTestConfig(APITestConfig):
-    embedding_model: Optional[str] = Field(default_factory=None)
-
-
-class AgentsApiTestConfig(APITestConfig):
-    safety_shield: Optional[str] = Field(default_factory=None)
-
-
-class TestConfig(BaseModel):
-    inference: Optional[APITestConfig] = None
-    agents: Optional[AgentsApiTestConfig] = None
-    memory: Optional[MemoryApiTestConfig] = None
-
-
-def get_test_config_from_config_file(metafunc_config):
-    config_file = metafunc_config.getoption("--config")
-    if config_file is None:
-        return None
-
-    config_file_path = Path(__file__).parent / config_file
-    if not config_file_path.exists():
-        raise ValueError(
-            f"Test config {config_file} was specified but not found. Please make sure it exists in the llama_stack/providers/tests directory."
-        )
-    with open(config_file_path, "r") as config_file:
-        config = yaml.safe_load(config_file)
-        return TestConfig(**config)
-
-
-def get_test_config_for_api(metafunc_config, api):
-    test_config = get_test_config_from_config_file(metafunc_config)
-    if test_config is None:
-        return None
-    return getattr(test_config, api)
-
-
-def get_provider_fixture_overrides_from_test_config(metafunc_config, api, default_provider_fixture_combinations):
-    api_config = get_test_config_for_api(metafunc_config, api)
-    if api_config is None:
-        return None
-
-    fixture_combo_ids = set()
-    custom_provider_fixture_combos = []
-    for scenario in api_config.scenarios:
-        if scenario.fixture_combo_id:
-            fixture_combo_ids.add(scenario.fixture_combo_id)
-        else:
-            custom_provider_fixture_combos.append(
-                pytest.param(
-                    scenario.provider_fixtures,
-                    id=scenario.provider_fixtures.get("inference") or "",
-                )
-            )
-
-    if len(fixture_combo_ids) > 0:
-        for default_fixture in default_provider_fixture_combinations:
-            if default_fixture.id in fixture_combo_ids:
-                custom_provider_fixture_combos.append(default_fixture)
-    return custom_provider_fixture_combos
-
-
-def remote_stack_fixture() -> ProviderFixture:
-    if url := os.getenv("REMOTE_STACK_URL", None):
-        config = RemoteProviderConfig.from_url(url)
-    else:
-        config = RemoteProviderConfig(
-            host=get_env_or_fail("REMOTE_STACK_HOST"),
-            port=int(get_env_or_fail("REMOTE_STACK_PORT")),
-        )
-    return ProviderFixture(
-        providers=[
-            Provider(
-                provider_id="test::remote",
-                provider_type="test::remote",
-                config=config.model_dump(),
-            )
-        ],
-    )
-
-
-def pytest_configure(config):
-    config.option.tbstyle = "short"
-    config.option.disable_warnings = True
-
-    """Load environment variables at start of test run"""
-    # Load from .env file if it exists
-    env_file = Path(__file__).parent / ".env"
-    if env_file.exists():
-        load_dotenv(env_file)
-
-    # Load any environment variables passed via --env
-    env_vars = config.getoption("--env") or []
-    for env_var in env_vars:
-        key, value = env_var.split("=", 1)
-        os.environ[key] = value
-
-    if config.getoption("--output") is not None:
-        config.pluginmanager.register(Report(config.getoption("--output")))
-
-
-def pytest_addoption(parser):
-    parser.addoption(
-        "--providers",
-        default="",
-        help=(
-            "Provider configuration in format: api1=provider1,api2=provider2. "
-            "Example: --providers inference=ollama,safety=meta-reference"
-        ),
-    )
-    parser.addoption(
-        "--config",
-        action="store",
-        help="Set test config file (supported format: YAML), e.g. --config=test_config.yml",
-    )
-    parser.addoption(
-        "--output",
-        action="store",
-        help="Set output file for test report, e.g. --output=pytest_report.md",
-    )
-    """Add custom command line options"""
-    parser.addoption("--env", action="append", help="Set environment variables, e.g. --env KEY=value")
-    parser.addoption(
-        "--inference-model",
-        action="store",
-        default="meta-llama/Llama-3.2-3B-Instruct",
-        help="Specify the inference model to use for testing",
-    )
-    parser.addoption(
-        "--safety-shield",
-        action="store",
-        default="meta-llama/Llama-Guard-3-1B",
-        help="Specify the safety shield to use for testing",
-    )
-    parser.addoption(
-        "--embedding-model",
-        action="store",
-        default=None,
-        help="Specify the embedding model to use for testing",
-    )
-    parser.addoption(
-        "--judge-model",
-        action="store",
-        default="meta-llama/Llama-3.1-8B-Instruct",
-        help="Specify the judge model to use for testing",
-    )
-
-
-def make_provider_id(providers: Dict[str, str]) -> str:
-    return ":".join(f"{api}={provider}" for api, provider in sorted(providers.items()))
-
-
-def get_provider_marks(providers: Dict[str, str]) -> List[Any]:
-    marks = []
-    for provider in providers.values():
-        marks.append(getattr(pytest.mark, provider))
-    return marks
-
-
-def get_provider_fixture_overrides(config, available_fixtures: Dict[str, List[str]]) -> Optional[List[pytest.param]]:
-    provider_str = config.getoption("--providers")
-    if not provider_str:
-        return None
-
-    fixture_dict = parse_fixture_string(provider_str, available_fixtures)
-    return [
-        pytest.param(
-            fixture_dict,
-            id=make_provider_id(fixture_dict),
-            marks=get_provider_marks(fixture_dict),
-        )
-    ]
-
-
-def parse_fixture_string(provider_str: str, available_fixtures: Dict[str, List[str]]) -> Dict[str, str]:
-    """Parse provider string of format 'api1=provider1,api2=provider2'"""
-    if not provider_str:
-        return {}
-
-    fixtures = {}
-    pairs = provider_str.split(",")
-    for pair in pairs:
-        if "=" not in pair:
-            raise ValueError(f"Invalid provider specification: {pair}. Expected format: api=provider")
-        api, fixture = pair.split("=")
-        if api not in available_fixtures:
-            raise ValueError(f"Unknown API: {api}. Available APIs: {list(available_fixtures.keys())}")
-        if fixture not in available_fixtures[api]:
-            raise ValueError(
-                f"Unknown provider '{fixture}' for API '{api}'. Available providers: {list(available_fixtures[api])}"
-            )
-        fixtures[api] = fixture
-
-    # Check that all provided APIs are supported
-    for api in available_fixtures.keys():
-        if api not in fixtures:
-            raise ValueError(
-                f"Missing provider fixture for API '{api}'. Available providers: {list(available_fixtures[api])}"
-            )
-    return fixtures
-
-
-def pytest_itemcollected(item):
-    # Get all markers as a list
-    filtered = ("asyncio", "parametrize")
-    marks = [mark.name for mark in item.iter_markers() if mark.name not in filtered]
-    if marks:
-        marks = colored(",".join(marks), "yellow")
-        item.name = f"{item.name}[{marks}]"
-
-
-def pytest_collection_modifyitems(session, config, items):
-    test_config = get_test_config_from_config_file(config)
-    if test_config is None:
-        return
-
-    required_tests = defaultdict(set)
-    for api_test_config in [
-        test_config.inference,
-        test_config.memory,
-        test_config.agents,
-    ]:
-        if api_test_config is None:
-            continue
-        for test in api_test_config.tests:
-            arr = test.split("::")
-            if len(arr) != 2:
-                raise ValueError(f"Invalid format for test name {test}")
-            test_path, func_name = arr
-            required_tests[Path(__file__).parent / test_path].add(func_name)
-
-    new_items, deselected_items = [], []
-    for item in items:
-        func_name = getattr(item, "originalname", item.name)
-        if func_name in required_tests[item.fspath]:
-            new_items.append(item)
-            continue
-        deselected_items.append(item)
-
-    items[:] = new_items
-    config.hook.pytest_deselected(items=deselected_items)
-
-
-pytest_plugins = [
-    "llama_stack.providers.tests.inference.fixtures",
-    "llama_stack.providers.tests.safety.fixtures",
-    "llama_stack.providers.tests.vector_io.fixtures",
-    "llama_stack.providers.tests.agents.fixtures",
-    "llama_stack.providers.tests.datasetio.fixtures",
-    "llama_stack.providers.tests.scoring.fixtures",
-    "llama_stack.providers.tests.eval.fixtures",
-    "llama_stack.providers.tests.post_training.fixtures",
-    "llama_stack.providers.tests.tools.fixtures",
-]
diff --git a/llama_stack/providers/tests/report.py b/llama_stack/providers/tests/report.py
deleted file mode 100644
index bc29534be..000000000
--- a/llama_stack/providers/tests/report.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from collections import defaultdict
-from pathlib import Path
-
-import pytest
-from pytest import ExitCode
-from pytest_html.basereport import _process_outcome
-
-from llama_stack.models.llama.sku_list import all_registered_models
-from llama_stack.models.llama.sku_types import CoreModelId
-
-INFERENCE_APIS = ["chat_completion"]
-FUNCTIONALITIES = ["streaming", "structured_output", "tool_calling"]
-SUPPORTED_MODELS = {
-    "ollama": {
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_3_70b_instruct.value,
-        CoreModelId.llama_guard_3_8b.value,
-        CoreModelId.llama_guard_3_1b.value,
-    },
-    "fireworks": {
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_3_70b_instruct.value,
-        CoreModelId.llama_guard_3_8b.value,
-        CoreModelId.llama_guard_3_11b_vision.value,
-    },
-    "together": {
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_3_70b_instruct.value,
-        CoreModelId.llama_guard_3_8b.value,
-        CoreModelId.llama_guard_3_11b_vision.value,
-    },
-}
-
-
-class Report:
-    def __init__(self, output_path):
-        valid_file_format = (
-            output_path.split(".")[1] in ["md", "markdown"] if len(output_path.split(".")) == 2 else False
-        )
-        if not valid_file_format:
-            raise ValueError(f"Invalid output file {output_path}. Markdown file is required")
-        self.output_path = output_path
-        self.test_data = defaultdict(dict)
-        self.inference_tests = defaultdict(dict)
-
-    @pytest.hookimpl
-    def pytest_runtest_logreport(self, report):
-        # This hook is called in several phases, including setup, call and teardown
-        # The test is considered failed / error if any of the outcomes is not "Passed"
-        outcome = _process_outcome(report)
-        data = {
-            "outcome": report.outcome,
-            "longrepr": report.longrepr,
-            "name": report.nodeid,
-        }
-        if report.nodeid not in self.test_data:
-            self.test_data[report.nodeid] = data
-        elif self.test_data[report.nodeid] != outcome and outcome != "Passed":
-            self.test_data[report.nodeid] = data
-
-    @pytest.hookimpl
-    def pytest_sessionfinish(self, session, exitstatus):
-        if exitstatus <= ExitCode.INTERRUPTED:
-            return
-        report = []
-        report.append("# Llama Stack Integration Test Results Report")
-        report.append("\n## Summary")
-        report.append("\n## Supported Models: ")
-
-        header = "| Model Descriptor |"
-        dividor = "|:---|"
-        for k in SUPPORTED_MODELS.keys():
-            header += f"{k} |"
-            dividor += ":---:|"
-
-        report.append(header)
-        report.append(dividor)
-
-        rows = []
-        for model in all_registered_models():
-            if "Instruct" not in model.core_model_id.value and "Guard" not in model.core_model_id.value:
-                continue
-            row = f"| {model.core_model_id.value} |"
-            for k in SUPPORTED_MODELS.keys():
-                if model.core_model_id.value in SUPPORTED_MODELS[k]:
-                    row += " ✅ |"
-                else:
-                    row += " ❌ |"
-            rows.append(row)
-        report.extend(rows)
-
-        report.append("\n### Tests:")
-
-        for provider in SUPPORTED_MODELS.keys():
-            if provider not in self.inference_tests:
-                continue
-            report.append(f"\n #### {provider}")
-            test_table = [
-                "| Area | Model | API | Functionality Test | Status |",
-                "|:-----|:-----|:-----|:-----|:-----|",
-            ]
-            for api in INFERENCE_APIS:
-                tests = self.inference_tests[provider][api]
-                for test_nodeid in tests:
-                    row = "|{area} | {model} | {api} | {test} | {result} ".format(
-                        area="Text" if "text" in test_nodeid else "Vision",
-                        model=("Llama-3.1-8B-Instruct" if "text" in test_nodeid else "Llama3.2-11B-Vision-Instruct"),
-                        api=f"/{api}",
-                        test=self.get_simple_function_name(test_nodeid),
-                        result=("✅" if self.test_data[test_nodeid]["outcome"] == "passed" else "❌"),
-                    )
-                    test_table += [row]
-            report.extend(test_table)
-            report.append("\n")
-
-        output_file = Path(self.output_path)
-        output_file.write_text("\n".join(report))
-        print(f"\n Report generated: {output_file.absolute()}")
-
-    @pytest.hookimpl(trylast=True)
-    def pytest_collection_modifyitems(self, session, config, items):
-        for item in items:
-            inference = item.callspec.params.get("inference_stack")
-            if "inference" in item.nodeid:
-                func_name = getattr(item, "originalname", item.name)
-                for api in INFERENCE_APIS:
-                    if api in func_name:
-                        api_tests = self.inference_tests[inference].get(api, set())
-                        api_tests.add(item.nodeid)
-                        self.inference_tests[inference][api] = api_tests
-
-    def get_simple_function_name(self, nodeid):
-        """Extract function name from nodeid.
-
-        Examples:
-        - 'tests/test_math.py::test_addition' -> 'test_addition'
-        - 'tests/test_math.py::TestClass::test_method' -> test_method'
-        """
-        parts = nodeid.split("::")
-        func_name = nodeid  # Fallback to full nodeid if pattern doesn't match
-        if len(parts) == 2:  # Simple function
-            func_name = parts[1]
-        elif len(parts) == 3:  # Class method
-            func_name = parts[2]
-        return func_name.split("[")[0]
diff --git a/llama_stack/providers/utils/bedrock/config.py b/llama_stack/providers/utils/bedrock/config.py
index 95019666b..b25617d76 100644
--- a/llama_stack/providers/utils/bedrock/config.py
+++ b/llama_stack/providers/utils/bedrock/config.py
@@ -3,54 +3,53 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Optional
 
 from pydantic import BaseModel, Field
 
 
 class BedrockBaseConfig(BaseModel):
-    aws_access_key_id: Optional[str] = Field(
+    aws_access_key_id: str | None = Field(
         default=None,
         description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
     )
-    aws_secret_access_key: Optional[str] = Field(
+    aws_secret_access_key: str | None = Field(
         default=None,
         description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
     )
-    aws_session_token: Optional[str] = Field(
+    aws_session_token: str | None = Field(
         default=None,
         description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
     )
-    region_name: Optional[str] = Field(
+    region_name: str | None = Field(
         default=None,
         description="The default AWS Region to use, for example, us-west-1 or us-west-2."
         "Default use environment variable: AWS_DEFAULT_REGION",
     )
-    profile_name: Optional[str] = Field(
+    profile_name: str | None = Field(
         default=None,
         description="The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE",
     )
-    total_max_attempts: Optional[int] = Field(
+    total_max_attempts: int | None = Field(
         default=None,
         description="An integer representing the maximum number of attempts that will be made for a single request, "
         "including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
     )
-    retry_mode: Optional[str] = Field(
+    retry_mode: str | None = Field(
         default=None,
         description="A string representing the type of retries Boto3 will perform."
         "Default use environment variable: AWS_RETRY_MODE",
     )
-    connect_timeout: Optional[float] = Field(
+    connect_timeout: float | None = Field(
         default=60,
         description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
         "The default is 60 seconds.",
     )
-    read_timeout: Optional[float] = Field(
+    read_timeout: float | None = Field(
         default=60,
         description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
         "The default is 60 seconds.",
     )
-    session_ttl: Optional[int] = Field(
+    session_ttl: int | None = Field(
         default=3600,
         description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
     )
diff --git a/llama_stack/providers/utils/common/data_schema_validator.py b/llama_stack/providers/utils/common/data_schema_validator.py
index eb9d9dd60..28a243863 100644
--- a/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/llama_stack/providers/utils/common/data_schema_validator.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from enum import Enum
-from typing import Any, Dict, List
+from typing import Any
 
 from llama_stack.apis.common.type_system import (
     ChatCompletionInputType,
@@ -85,16 +85,16 @@ def get_valid_schemas(api_str: str):
 
 
 def validate_dataset_schema(
-    dataset_schema: Dict[str, Any],
-    expected_schemas: List[Dict[str, Any]],
+    dataset_schema: dict[str, Any],
+    expected_schemas: list[dict[str, Any]],
 ):
     if dataset_schema not in expected_schemas:
         raise ValueError(f"Dataset {dataset_schema} does not have a correct input schema in {expected_schemas}")
 
 
 def validate_row_schema(
-    input_row: Dict[str, Any],
-    expected_schemas: List[Dict[str, Any]],
+    input_row: dict[str, Any],
+    expected_schemas: list[dict[str, Any]],
 ):
     for schema in expected_schemas:
         if all(key in input_row for key in schema):
diff --git a/llama_stack/providers/utils/inference/__init__.py b/llama_stack/providers/utils/inference/__init__.py
index e36be9404..66269d173 100644
--- a/llama_stack/providers/utils/inference/__init__.py
+++ b/llama_stack/providers/utils/inference/__init__.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List
-
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.models.llama.sku_types import *  # noqa: F403
 
@@ -22,7 +20,7 @@ def is_supported_safety_model(model: Model) -> bool:
     ]
 
 
-def supported_inference_models() -> List[Model]:
+def supported_inference_models() -> list[Model]:
     return [
         m
         for m in all_registered_models()
diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py
index 8b14c7502..97cf87360 100644
--- a/llama_stack/providers/utils/inference/embedding_mixin.py
+++ b/llama_stack/providers/utils/inference/embedding_mixin.py
@@ -4,8 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import base64
 import logging
-from typing import TYPE_CHECKING, List, Optional
+import struct
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from sentence_transformers import SentenceTransformer
@@ -15,6 +17,9 @@ from llama_stack.apis.inference import (
     EmbeddingTaskType,
     InterleavedContentItem,
     ModelStore,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
     TextTruncation,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
@@ -31,10 +36,10 @@ class SentenceTransformerEmbeddingMixin:
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         model = await self.model_store.get_model(model_id)
         embedding_model = self._load_sentence_transformer_model(model.provider_resource_id)
@@ -43,6 +48,50 @@ class SentenceTransformerEmbeddingMixin:
         )
         return EmbeddingsResponse(embeddings=embeddings)
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        # Convert input to list format if it's a single string
+        input_list = [input] if isinstance(input, str) else input
+        if not input_list:
+            raise ValueError("Empty list not supported")
+
+        # Get the model and generate embeddings
+        model_obj = await self.model_store.get_model(model)
+        embedding_model = self._load_sentence_transformer_model(model_obj.provider_resource_id)
+        embeddings = embedding_model.encode(input_list, show_progress_bar=False)
+
+        # Convert embeddings to the requested format
+        data = []
+        for i, embedding in enumerate(embeddings):
+            if encoding_format == "base64":
+                # Convert float array to base64 string
+                float_bytes = struct.pack(f"{len(embedding)}f", *embedding)
+                embedding_value = base64.b64encode(float_bytes).decode("ascii")
+            else:
+                # Default to float format
+                embedding_value = embedding.tolist()
+
+            data.append(
+                OpenAIEmbeddingData(
+                    embedding=embedding_value,
+                    index=i,
+                )
+            )
+
+        # Not returning actual token usage
+        usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
+        return OpenAIEmbeddingsResponse(
+            data=data,
+            model=model_obj.provider_resource_id,
+            usage=usage,
+        )
+
     def _load_sentence_transformer_model(self, model: str) -> "SentenceTransformer":
         global EMBEDDING_MODELS
 
diff --git a/llama_stack/providers/utils/inference/inference_store.py b/llama_stack/providers/utils/inference/inference_store.py
new file mode 100644
index 000000000..7b6bc2e3d
--- /dev/null
+++ b/llama_stack/providers/utils/inference/inference_store.py
@@ -0,0 +1,123 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.inference import (
+    ListOpenAIChatCompletionResponse,
+    OpenAIChatCompletion,
+    OpenAICompletionWithInputMessages,
+    OpenAIMessageParam,
+    Order,
+)
+from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
+
+from ..sqlstore.api import ColumnDefinition, ColumnType
+from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl
+
+
+class InferenceStore:
+    def __init__(self, sql_store_config: SqlStoreConfig):
+        if not sql_store_config:
+            sql_store_config = SqliteSqlStoreConfig(
+                db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
+            )
+        self.sql_store_config = sql_store_config
+        self.sql_store = None
+
+    async def initialize(self):
+        """Create the necessary tables if they don't exist."""
+        self.sql_store = sqlstore_impl(self.sql_store_config)
+        await self.sql_store.create_table(
+            "chat_completions",
+            {
+                "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
+                "created": ColumnType.INTEGER,
+                "model": ColumnType.STRING,
+                "choices": ColumnType.JSON,
+                "input_messages": ColumnType.JSON,
+            },
+        )
+
+    async def store_chat_completion(
+        self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
+    ) -> None:
+        if not self.sql_store:
+            raise ValueError("Inference store is not initialized")
+
+        data = chat_completion.model_dump()
+
+        await self.sql_store.insert(
+            "chat_completions",
+            {
+                "id": data["id"],
+                "created": data["created"],
+                "model": data["model"],
+                "choices": data["choices"],
+                "input_messages": [message.model_dump() for message in input_messages],
+            },
+        )
+
+    async def list_chat_completions(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIChatCompletionResponse:
+        """
+        List chat completions from the database.
+
+        :param after: The ID of the last chat completion to return.
+        :param limit: The maximum number of chat completions to return.
+        :param model: The model to filter by.
+        :param order: The order to sort the chat completions by.
+        """
+        if not self.sql_store:
+            raise ValueError("Inference store is not initialized")
+
+        # TODO: support after
+        if after:
+            raise NotImplementedError("After is not supported for SQLite")
+        if not order:
+            order = Order.desc
+
+        rows = await self.sql_store.fetch_all(
+            "chat_completions",
+            where={"model": model} if model else None,
+            order_by=[("created", order.value)],
+            limit=limit,
+        )
+
+        data = [
+            OpenAICompletionWithInputMessages(
+                id=row["id"],
+                created=row["created"],
+                model=row["model"],
+                choices=row["choices"],
+                input_messages=row["input_messages"],
+            )
+            for row in rows
+        ]
+        return ListOpenAIChatCompletionResponse(
+            data=data,
+            # TODO: implement has_more
+            has_more=False,
+            first_id=data[0].id if data else "",
+            last_id=data[-1].id if data else "",
+        )
+
+    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
+        if not self.sql_store:
+            raise ValueError("Inference store is not initialized")
+
+        row = await self.sql_store.fetch_one("chat_completions", where={"id": completion_id})
+        if not row:
+            raise ValueError(f"Chat completion with id {completion_id} not found") from None
+        return OpenAICompletionWithInputMessages(
+            id=row["id"],
+            created=row["created"],
+            model=row["model"],
+            choices=row["choices"],
+            input_messages=row["input_messages"],
+        )
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index cd0f4ec67..dab10bc55 100644
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -4,7 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
+import base64
+import struct
+from collections.abc import AsyncGenerator, AsyncIterator
+from typing import Any
 
 import litellm
 
@@ -18,7 +21,7 @@ from llama_stack.apis.inference import (
     ChatCompletionResponseStreamChunk,
     EmbeddingsResponse,
     EmbeddingTaskType,
-    Inference,
+    InferenceProvider,
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
@@ -30,7 +33,16 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+)
 from llama_stack.apis.models.models import Model
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
@@ -52,13 +64,16 @@ logger = get_logger(name=__name__, category="inference")
 
 class LiteLLMOpenAIMixin(
     ModelRegistryHelper,
-    Inference,
+    InferenceProvider,
     NeedsRequestProviderData,
 ):
+    # TODO: avoid exposing the litellm specific model names to the user.
+    #       potential change: add a prefix param that gets added to the model name
+    #                         when calling litellm.
     def __init__(
         self,
         model_entries,
-        api_key_from_config: Optional[str],
+        api_key_from_config: str | None,
         provider_data_api_key_field: str,
         openai_compat_api_base: str | None = None,
     ):
@@ -84,30 +99,35 @@ class LiteLLMOpenAIMixin(
             raise ValueError(f"Unsupported model: {model.provider_resource_id}")
         return model
 
+    def get_litellm_model_name(self, model_id: str) -> str:
+        # users may be using openai/ prefix in their model names. the openai/models.py did this by default.
+        # model_id.startswith("openai/") is for backwards compatibility.
+        return "openai/" + model_id if self.is_openai_compat and not model_id.startswith("openai/") else model_id
+
     async def completion(
         self,
         model_id: str,
         content: InterleavedContent,
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
     ) -> AsyncGenerator:
         raise NotImplementedError("LiteLLM does not support completion requests")
 
     async def chat_completion(
         self,
         model_id: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = None,
-        response_format: Optional[ResponseFormat] = None,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-        tool_config: Optional[ToolConfig] = None,
-    ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
+        messages: list[Message],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_choice: ToolChoice | None = ToolChoice.auto,
+        tool_prompt_format: ToolPromptFormat | None = None,
+        response_format: ResponseFormat | None = None,
+        stream: bool | None = False,
+        logprobs: LogProbConfig | None = None,
+        tool_config: ToolConfig | None = None,
+    ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
         if sampling_params is None:
             sampling_params = SamplingParams()
 
@@ -124,8 +144,7 @@ class LiteLLMOpenAIMixin(
         )
 
         params = await self._get_params(request)
-        if self.is_openai_compat:
-            params["model"] = "openai/" + params["model"]
+        params["model"] = self.get_litellm_model_name(params["model"])
 
         logger.debug(f"params to litellm (openai compat): {params}")
         # unfortunately, we need to use synchronous litellm.completion here because litellm
@@ -214,65 +233,113 @@ class LiteLLMOpenAIMixin(
                     else request.tool_config.tool_choice
                 )
 
-        provider_data = self.get_request_provider_data()
-        key_field = self.provider_data_api_key_field
-        if provider_data and getattr(provider_data, key_field, None):
-            api_key = getattr(provider_data, key_field)
-        else:
-            api_key = self.api_key_from_config
-
         return {
             "model": request.model,
-            "api_key": api_key,
+            "api_key": self.get_api_key(),
             "api_base": self.api_base,
             **input_dict,
             "stream": request.stream,
             **get_sampling_options(request.sampling_params),
         }
 
+    def get_api_key(self) -> str:
+        provider_data = self.get_request_provider_data()
+        key_field = self.provider_data_api_key_field
+        if provider_data and getattr(provider_data, key_field, None):
+            api_key = getattr(provider_data, key_field)
+        else:
+            api_key = self.api_key_from_config
+        return api_key
+
     async def embeddings(
         self,
         model_id: str,
-        contents: List[str] | List[InterleavedContentItem],
-        text_truncation: Optional[TextTruncation] = TextTruncation.none,
-        output_dimension: Optional[int] = None,
-        task_type: Optional[EmbeddingTaskType] = None,
+        contents: list[str] | list[InterleavedContentItem],
+        text_truncation: TextTruncation | None = TextTruncation.none,
+        output_dimension: int | None = None,
+        task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         model = await self.model_store.get_model(model_id)
 
         response = litellm.embedding(
-            model=model.provider_resource_id,
+            model=self.get_litellm_model_name(model.provider_resource_id),
             input=[interleaved_content_as_str(content) for content in contents],
         )
 
         embeddings = [data["embedding"] for data in response["data"]]
         return EmbeddingsResponse(embeddings=embeddings)
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        model_obj = await self.model_store.get_model(model)
+
+        # Convert input to list if it's a string
+        input_list = [input] if isinstance(input, str) else input
+
+        # Call litellm embedding function
+        # litellm.drop_params = True
+        response = litellm.embedding(
+            model=self.get_litellm_model_name(model_obj.provider_resource_id),
+            input=input_list,
+            api_key=self.get_api_key(),
+            api_base=self.api_base,
+            dimensions=dimensions,
+        )
+
+        # Convert response to OpenAI format
+        data = []
+        for i, embedding_data in enumerate(response["data"]):
+            # we encode to base64 if the encoding format is base64 in the request
+            if encoding_format == "base64":
+                byte_data = b"".join(struct.pack("f", f) for f in embedding_data["embedding"])
+                embedding = base64.b64encode(byte_data).decode("utf-8")
+            else:
+                embedding = embedding_data["embedding"]
+
+            data.append(OpenAIEmbeddingData(embedding=embedding, index=i))
+
+        usage = OpenAIEmbeddingUsage(
+            prompt_tokens=response["usage"]["prompt_tokens"],
+            total_tokens=response["usage"]["total_tokens"],
+        )
+
+        return OpenAIEmbeddingsResponse(
+            data=data,
+            model=model_obj.provider_resource_id,
+            usage=usage,
+        )
+
     async def openai_completion(
         self,
         model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
     ) -> OpenAICompletion:
-        model_obj = await self._get_model(model)
+        model_obj = await self.model_store.get_model(model)
         params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id,
+            model=self.get_litellm_model_name(model_obj.provider_resource_id),
             prompt=prompt,
             best_of=best_of,
             echo=echo,
@@ -291,38 +358,40 @@ class LiteLLMOpenAIMixin(
             user=user,
             guided_choice=guided_choice,
             prompt_logprobs=prompt_logprobs,
+            api_key=self.get_api_key(),
+            api_base=self.api_base,
         )
-        return litellm.text_completion(**params)
+        return await litellm.atext_completion(**params)
 
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIMessageParam],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-    ) -> OpenAIChatCompletion:
-        model_obj = await self._get_model(model)
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        model_obj = await self.model_store.get_model(model)
         params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id,
+            model=self.get_litellm_model_name(model_obj.provider_resource_id),
             messages=messages,
             frequency_penalty=frequency_penalty,
             function_call=function_call,
@@ -345,27 +414,29 @@ class LiteLLMOpenAIMixin(
             top_logprobs=top_logprobs,
             top_p=top_p,
             user=user,
+            api_key=self.get_api_key(),
+            api_base=self.api_base,
         )
-        return litellm.completion(**params)
+        return await litellm.acompletion(**params)
 
     async def batch_completion(
         self,
         model_id: str,
-        content_batch: List[InterleavedContent],
-        sampling_params: Optional[SamplingParams] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        content_batch: list[InterleavedContent],
+        sampling_params: SamplingParams | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ):
         raise NotImplementedError("Batch completion is not supported for OpenAI Compat")
 
     async def batch_chat_completion(
         self,
         model_id: str,
-        messages_batch: List[List[Message]],
-        sampling_params: Optional[SamplingParams] = None,
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        response_format: Optional[ResponseFormat] = None,
-        logprobs: Optional[LogProbConfig] = None,
+        messages_batch: list[list[Message]],
+        sampling_params: SamplingParams | None = None,
+        tools: list[ToolDefinition] | None = None,
+        tool_config: ToolConfig | None = None,
+        response_format: ResponseFormat | None = None,
+        logprobs: LogProbConfig | None = None,
     ):
         raise NotImplementedError("Batch chat completion is not supported for OpenAI Compat")
diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py
index 4d7063953..d707e36c2 100644
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -20,13 +20,13 @@ from llama_stack.providers.utils.inference import (
 # more closer to the Model class.
 class ProviderModelEntry(BaseModel):
     provider_model_id: str
-    aliases: List[str] = Field(default_factory=list)
-    llama_model: Optional[str] = None
+    aliases: list[str] = Field(default_factory=list)
+    llama_model: str | None = None
     model_type: ModelType = ModelType.llm
-    metadata: Dict[str, Any] = Field(default_factory=dict)
+    metadata: dict[str, Any] = Field(default_factory=dict)
 
 
-def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
+def get_huggingface_repo(model_descriptor: str) -> str | None:
     for model in all_registered_models():
         if model.descriptor() == model_descriptor:
             return model.huggingface_repo
@@ -34,7 +34,7 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
 
 
 def build_hf_repo_model_entry(
-    provider_model_id: str, model_descriptor: str, additional_aliases: Optional[List[str]] = None
+    provider_model_id: str, model_descriptor: str, additional_aliases: list[str] | None = None
 ) -> ProviderModelEntry:
     aliases = [
         get_huggingface_repo(model_descriptor),
@@ -58,7 +58,7 @@ def build_model_entry(provider_model_id: str, model_descriptor: str) -> Provider
 
 
 class ModelRegistryHelper(ModelsProtocolPrivate):
-    def __init__(self, model_entries: List[ProviderModelEntry]):
+    def __init__(self, model_entries: list[ProviderModelEntry]):
         self.alias_to_provider_id_map = {}
         self.provider_id_to_llama_model_map = {}
         for entry in model_entries:
@@ -72,43 +72,53 @@ class ModelRegistryHelper(ModelsProtocolPrivate):
                 self.alias_to_provider_id_map[entry.llama_model] = entry.provider_model_id
                 self.provider_id_to_llama_model_map[entry.provider_model_id] = entry.llama_model
 
-    def get_provider_model_id(self, identifier: str) -> Optional[str]:
+    def get_provider_model_id(self, identifier: str) -> str | None:
         return self.alias_to_provider_id_map.get(identifier, None)
 
-    def get_llama_model(self, provider_model_id: str) -> Optional[str]:
+    # TODO: why keep a separate llama model mapping?
+    def get_llama_model(self, provider_model_id: str) -> str | None:
         return self.provider_id_to_llama_model_map.get(provider_model_id, None)
 
     async def register_model(self, model: Model) -> Model:
+        if not (supported_model_id := self.get_provider_model_id(model.provider_resource_id)):
+            raise ValueError(
+                f"Model '{model.provider_resource_id}' is not supported. Supported models are: {', '.join(self.alias_to_provider_id_map.keys())}"
+            )
+        provider_resource_id = self.get_provider_model_id(model.model_id)
         if model.model_type == ModelType.embedding:
             # embedding models are always registered by their provider model id and does not need to be mapped to a llama model
             provider_resource_id = model.provider_resource_id
-        else:
-            provider_resource_id = self.get_provider_model_id(model.provider_resource_id)
-
         if provider_resource_id:
-            model.provider_resource_id = provider_resource_id
+            if provider_resource_id != supported_model_id:  # be idemopotent, only reject differences
+                raise ValueError(
+                    f"Model id '{model.model_id}' is already registered. Please use a different id or unregister it first."
+                )
         else:
             llama_model = model.metadata.get("llama_model")
-            if llama_model is None:
-                return model
+            if llama_model:
+                existing_llama_model = self.get_llama_model(model.provider_resource_id)
+                if existing_llama_model:
+                    if existing_llama_model != llama_model:
+                        raise ValueError(
+                            f"Provider model id '{model.provider_resource_id}' is already registered to a different llama model: '{existing_llama_model}'"
+                        )
+                else:
+                    if llama_model not in ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR:
+                        raise ValueError(
+                            f"Invalid llama_model '{llama_model}' specified in metadata. "
+                            f"Must be one of: {', '.join(ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR.keys())}"
+                        )
+                    self.provider_id_to_llama_model_map[model.provider_resource_id] = (
+                        ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR[llama_model]
+                    )
 
-            existing_llama_model = self.get_llama_model(model.provider_resource_id)
-            if existing_llama_model:
-                if existing_llama_model != llama_model:
-                    raise ValueError(
-                        f"Provider model id '{model.provider_resource_id}' is already registered to a different llama model: '{existing_llama_model}'"
-                    )
-            else:
-                if llama_model not in ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR:
-                    raise ValueError(
-                        f"Invalid llama_model '{llama_model}' specified in metadata. "
-                        f"Must be one of: {', '.join(ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR.keys())}"
-                    )
-                self.provider_id_to_llama_model_map[model.provider_resource_id] = (
-                    ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR[llama_model]
-                )
+        self.alias_to_provider_id_map[model.model_id] = supported_model_id
 
         return model
 
     async def unregister_model(self, model_id: str) -> None:
-        pass
+        # TODO: should we block unregistering base supported provider model IDs?
+        if model_id not in self.alias_to_provider_id_map:
+            raise ValueError(f"Model id '{model_id}' is not registered.")
+
+        del self.alias_to_provider_id_map[model_id]
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index f33cb4443..049f06fdb 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -8,7 +8,10 @@ import logging
 import time
 import uuid
 import warnings
-from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Union
+from collections.abc import AsyncGenerator, AsyncIterator, Awaitable, Iterable
+from typing import (
+    Any,
+)
 
 from openai import AsyncStream
 from openai.types.chat import (
@@ -50,6 +53,18 @@ from openai.types.chat.chat_completion import (
 from openai.types.chat.chat_completion import (
     ChoiceLogprobs as OpenAIChoiceLogprobs,  # same as chat_completion_chunk ChoiceLogprobs
 )
+from openai.types.chat.chat_completion_chunk import (
+    Choice as OpenAIChatCompletionChunkChoice,
+)
+from openai.types.chat.chat_completion_chunk import (
+    ChoiceDelta as OpenAIChoiceDelta,
+)
+from openai.types.chat.chat_completion_chunk import (
+    ChoiceDeltaToolCall as OpenAIChoiceDeltaToolCall,
+)
+from openai.types.chat.chat_completion_chunk import (
+    ChoiceDeltaToolCallFunction as OpenAIChoiceDeltaToolCallFunction,
+)
 from openai.types.chat.chat_completion_content_part_image_param import (
     ImageURL as OpenAIImageURL,
 )
@@ -59,12 +74,14 @@ from openai.types.chat.chat_completion_message_tool_call_param import (
 from pydantic import BaseModel
 
 from llama_stack.apis.common.content_types import (
+    URL,
     ImageContentItem,
     InterleavedContent,
     TextContentItem,
     TextDelta,
     ToolCallDelta,
     ToolCallParseStatus,
+    _URLOrData,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
@@ -80,17 +97,30 @@ from llama_stack.apis.inference import (
     SamplingParams,
     SystemMessage,
     TokenLogProbs,
+    ToolChoice,
     ToolResponseMessage,
     TopKSamplingStrategy,
     TopPSamplingStrategy,
     UserMessage,
 )
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAICompletionChoice
+from llama_stack.apis.inference.inference import (
+    JsonSchemaResponseFormat,
+    OpenAIChatCompletion,
+    OpenAICompletion,
+    OpenAICompletionChoice,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+    ToolConfig,
+)
+from llama_stack.apis.inference.inference import (
+    OpenAIChoice as OpenAIChatCompletionChoice,
+)
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     StopReason,
     ToolCall,
     ToolDefinition,
+    ToolParamDefinition,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
     convert_image_content_to_url,
@@ -105,24 +135,24 @@ class OpenAICompatCompletionChoiceDelta(BaseModel):
 
 
 class OpenAICompatLogprobs(BaseModel):
-    text_offset: Optional[List[int]] = None
+    text_offset: list[int] | None = None
 
-    token_logprobs: Optional[List[float]] = None
+    token_logprobs: list[float] | None = None
 
-    tokens: Optional[List[str]] = None
+    tokens: list[str] | None = None
 
-    top_logprobs: Optional[List[Dict[str, float]]] = None
+    top_logprobs: list[dict[str, float]] | None = None
 
 
 class OpenAICompatCompletionChoice(BaseModel):
-    finish_reason: Optional[str] = None
-    text: Optional[str] = None
-    delta: Optional[OpenAICompatCompletionChoiceDelta] = None
-    logprobs: Optional[OpenAICompatLogprobs] = None
+    finish_reason: str | None = None
+    text: str | None = None
+    delta: OpenAICompatCompletionChoiceDelta | None = None
+    logprobs: OpenAICompatLogprobs | None = None
 
 
 class OpenAICompatCompletionResponse(BaseModel):
-    choices: List[OpenAICompatCompletionChoice]
+    choices: list[OpenAICompatCompletionChoice]
 
 
 def get_sampling_strategy_options(params: SamplingParams) -> dict:
@@ -181,8 +211,8 @@ def get_stop_reason(finish_reason: str) -> StopReason:
 
 
 def convert_openai_completion_logprobs(
-    logprobs: Optional[OpenAICompatLogprobs],
-) -> Optional[List[TokenLogProbs]]:
+    logprobs: OpenAICompatLogprobs | None,
+) -> list[TokenLogProbs] | None:
     if not logprobs:
         return None
     if hasattr(logprobs, "top_logprobs"):
@@ -199,7 +229,7 @@ def convert_openai_completion_logprobs(
     return None
 
 
-def convert_openai_completion_logprobs_stream(text: str, logprobs: Optional[Union[float, OpenAICompatLogprobs]]):
+def convert_openai_completion_logprobs_stream(text: str, logprobs: float | OpenAICompatLogprobs | None):
     if logprobs is None:
         return None
     if isinstance(logprobs, float):
@@ -488,11 +518,37 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
     else:
         content = [await _convert_content(message.content)]
 
-    return {
+    result = {
         "role": message.role,
         "content": content,
     }
 
+    if hasattr(message, "tool_calls") and message.tool_calls:
+        result["tool_calls"] = []
+        for tc in message.tool_calls:
+            # The tool.tool_name can be a str or a BuiltinTool enum. If
+            # it's the latter, convert to a string.
+            tool_name = tc.tool_name
+            if isinstance(tool_name, BuiltinTool):
+                tool_name = tool_name.value
+
+            # arguments_json can be None, so attempt it first and fall back to arguments
+            if hasattr(tc, "arguments_json") and tc.arguments_json:
+                arguments = tc.arguments_json
+            else:
+                arguments = json.dumps(tc.arguments)
+            result["tool_calls"].append(
+                {
+                    "id": tc.call_id,
+                    "type": "function",
+                    "function": {
+                        "name": tool_name,
+                        "arguments": arguments,
+                    },
+                }
+            )
+    return result
+
 
 class UnparseableToolCall(BaseModel):
     """
@@ -506,7 +562,7 @@ class UnparseableToolCall(BaseModel):
 
 
 async def convert_message_to_openai_dict_new(
-    message: Message | Dict,
+    message: Message | dict,
 ) -> OpenAIChatCompletionMessage:
     """
     Convert a Message to an OpenAI API-compatible dictionary.
@@ -535,14 +591,10 @@ async def convert_message_to_openai_dict_new(
     #  List[...] -> List[...]
     async def _convert_message_content(
         content: InterleavedContent,
-    ) -> Union[str, Iterable[OpenAIChatCompletionContentPartParam]]:
+    ) -> str | Iterable[OpenAIChatCompletionContentPartParam]:
         async def impl(
             content_: InterleavedContent,
-        ) -> Union[
-            str,
-            OpenAIChatCompletionContentPartParam,
-            List[OpenAIChatCompletionContentPartParam],
-        ]:
+        ) -> str | OpenAIChatCompletionContentPartParam | list[OpenAIChatCompletionContentPartParam]:
             # Llama Stack and OpenAI spec match for str and text input
             if isinstance(content_, str):
                 return content_
@@ -589,7 +641,7 @@ async def convert_message_to_openai_dict_new(
         ]
         params = {}
         if tool_calls:
-            params = {"tool_calls": tool_calls}
+            params["tool_calls"] = tool_calls
         out = OpenAIChatCompletionAssistantMessage(
             role="assistant",
             content=await _convert_message_content(message.content),
@@ -614,7 +666,7 @@ async def convert_message_to_openai_dict_new(
 
 def convert_tool_call(
     tool_call: ChatCompletionMessageToolCall,
-) -> Union[ToolCall, UnparseableToolCall]:
+) -> ToolCall | UnparseableToolCall:
     """
     Convert a ChatCompletionMessageToolCall tool call to either a
     ToolCall or UnparseableToolCall. Returns an UnparseableToolCall
@@ -670,7 +722,10 @@ def to_openai_param_type(param_type: str) -> dict:
     if param_type.startswith("list[") and param_type.endswith("]"):
         inner_type = param_type[5:-1]
         if inner_type in basic_types:
-            return {"type": "array", "items": {"type": basic_types.get(inner_type, inner_type)}}
+            return {
+                "type": "array",
+                "items": {"type": basic_types.get(inner_type, inner_type)},
+            }
 
     return {"type": param_type}
 
@@ -751,6 +806,17 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
     return out
 
 
+def _convert_stop_reason_to_openai_finish_reason(stop_reason: StopReason) -> str:
+    """
+    Convert a StopReason to an OpenAI chat completion finish_reason.
+    """
+    return {
+        StopReason.end_of_turn: "stop",
+        StopReason.end_of_message: "tool_calls",
+        StopReason.out_of_tokens: "length",
+    }.get(stop_reason, "stop")
+
+
 def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
     """
     Convert an OpenAI chat completion finish_reason to a StopReason.
@@ -776,9 +842,65 @@ def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
     }.get(finish_reason, StopReason.end_of_turn)
 
 
+def _convert_openai_request_tool_config(tool_choice: str | dict[str, Any] | None = None) -> ToolConfig:
+    tool_config = ToolConfig()
+    if tool_choice:
+        try:
+            tool_choice = ToolChoice(tool_choice)
+        except ValueError:
+            pass
+        tool_config.tool_choice = tool_choice
+    return tool_config
+
+
+def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
+    lls_tools = []
+    if not tools:
+        return lls_tools
+
+    for tool in tools:
+        tool_fn = tool.get("function", {})
+        tool_name = tool_fn.get("name", None)
+        tool_desc = tool_fn.get("description", None)
+
+        tool_params = tool_fn.get("parameters", None)
+        lls_tool_params = {}
+        if tool_params is not None:
+            tool_param_properties = tool_params.get("properties", {})
+            for tool_param_key, tool_param_value in tool_param_properties.items():
+                tool_param_def = ToolParamDefinition(
+                    param_type=str(tool_param_value.get("type", None)),
+                    description=tool_param_value.get("description", None),
+                )
+                lls_tool_params[tool_param_key] = tool_param_def
+
+        lls_tool = ToolDefinition(
+            tool_name=tool_name,
+            description=tool_desc,
+            parameters=lls_tool_params,
+        )
+        lls_tools.append(lls_tool)
+    return lls_tools
+
+
+def _convert_openai_request_response_format(
+    response_format: OpenAIResponseFormatParam = None,
+):
+    if not response_format:
+        return None
+    # response_format can be a dict or a pydantic model
+    response_format = dict(response_format)
+    if response_format.get("type", "") == "json_schema":
+        return JsonSchemaResponseFormat(
+            type="json_schema",
+            json_schema=response_format.get("json_schema", {}).get("schema", ""),
+        )
+    return None
+
+
 def _convert_openai_tool_calls(
-    tool_calls: List[OpenAIChatCompletionMessageToolCall],
-) -> List[ToolCall]:
+    tool_calls: list[OpenAIChatCompletionMessageToolCall],
+) -> list[ToolCall]:
     """
     Convert an OpenAI ChatCompletionMessageToolCall list into a list of ToolCall.
 
@@ -814,7 +936,7 @@ def _convert_openai_tool_calls(
 
 def _convert_openai_logprobs(
     logprobs: OpenAIChoiceLogprobs,
-) -> Optional[List[TokenLogProbs]]:
+) -> list[TokenLogProbs] | None:
     """
     Convert an OpenAI ChoiceLogprobs into a list of TokenLogProbs.
 
@@ -847,9 +969,9 @@ def _convert_openai_logprobs(
 
 
 def _convert_openai_sampling_params(
-    max_tokens: Optional[int] = None,
-    temperature: Optional[float] = None,
-    top_p: Optional[float] = None,
+    max_tokens: int | None = None,
+    temperature: float | None = None,
+    top_p: float | None = None,
 ) -> SamplingParams:
     sampling_params = SamplingParams()
 
@@ -871,6 +993,52 @@ def _convert_openai_sampling_params(
     return sampling_params
 
 
+def openai_messages_to_messages(
+    messages: list[OpenAIMessageParam],
+) -> list[Message]:
+    """
+    Convert a list of OpenAIChatCompletionMessage into a list of Message.
+    """
+    converted_messages = []
+    for message in messages:
+        if message.role == "system":
+            converted_message = SystemMessage(content=openai_content_to_content(message.content))
+        elif message.role == "user":
+            converted_message = UserMessage(content=openai_content_to_content(message.content))
+        elif message.role == "assistant":
+            converted_message = CompletionMessage(
+                content=openai_content_to_content(message.content),
+                tool_calls=_convert_openai_tool_calls(message.tool_calls),
+                stop_reason=StopReason.end_of_turn,
+            )
+        elif message.role == "tool":
+            converted_message = ToolResponseMessage(
+                role="tool",
+                call_id=message.tool_call_id,
+                content=openai_content_to_content(message.content),
+            )
+        else:
+            raise ValueError(f"Unknown role {message.role}")
+        converted_messages.append(converted_message)
+    return converted_messages
+
+
+def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam]):
+    if isinstance(content, str):
+        return content
+    elif isinstance(content, list):
+        return [openai_content_to_content(c) for c in content]
+    elif hasattr(content, "type"):
+        if content.type == "text":
+            return TextContentItem(type="text", text=content.text)
+        elif content.type == "image_url":
+            return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url)))
+        else:
+            raise ValueError(f"Unknown content type: {content.type}")
+    else:
+        raise ValueError(f"Unknown content type: {content}")
+
+
 def convert_openai_chat_completion_choice(
     choice: OpenAIChoice,
 ) -> ChatCompletionResponse:
@@ -1080,32 +1248,45 @@ async def convert_openai_chat_completion_stream(
 
 
 async def prepare_openai_completion_params(**params):
-    completion_params = {k: v for k, v in params.items() if v is not None}
+    async def _prepare_value(value: Any) -> Any:
+        new_value = value
+        if isinstance(value, list):
+            new_value = [await _prepare_value(v) for v in value]
+        elif isinstance(value, dict):
+            new_value = {k: await _prepare_value(v) for k, v in value.items()}
+        elif isinstance(value, BaseModel):
+            new_value = value.model_dump(exclude_none=True)
+        return new_value
+
+    completion_params = {}
+    for k, v in params.items():
+        if v is not None:
+            completion_params[k] = await _prepare_value(v)
     return completion_params
 
 
-class OpenAICompletionUnsupportedMixin:
+class OpenAICompletionToLlamaStackMixin:
     async def openai_completion(
         self,
         model: str,
-        prompt: Union[str, List[str], List[int], List[List[int]]],
-        best_of: Optional[int] = None,
-        echo: Optional[bool] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
-        guided_choice: Optional[List[str]] = None,
-        prompt_logprobs: Optional[int] = None,
+        prompt: str | list[str] | list[int] | list[list[int]],
+        best_of: int | None = None,
+        echo: bool | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+        guided_choice: list[str] | None = None,
+        prompt_logprobs: int | None = None,
     ) -> OpenAICompletion:
         if stream:
             raise ValueError(f"{self.__class__.__name__} doesn't support streaming openai completions")
@@ -1122,6 +1303,7 @@ class OpenAICompletionUnsupportedMixin:
 
         choices = []
         # "n" is the number of completions to generate per prompt
+        n = n or 1
         for _i in range(0, n):
             # and we may have multiple prompts, if batching was used
 
@@ -1134,7 +1316,7 @@ class OpenAICompletionUnsupportedMixin:
 
                 index = len(choices)
                 text = result.content
-                finish_reason = _convert_openai_finish_reason(result.stop_reason)
+                finish_reason = _convert_stop_reason_to_openai_finish_reason(result.stop_reason)
 
                 choice = OpenAICompletionChoice(
                     index=index,
@@ -1152,31 +1334,152 @@ class OpenAICompletionUnsupportedMixin:
         )
 
 
-class OpenAIChatCompletionUnsupportedMixin:
+class OpenAIChatCompletionToLlamaStackMixin:
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIChatCompletionMessage],
-        frequency_penalty: Optional[float] = None,
-        function_call: Optional[Union[str, Dict[str, Any]]] = None,
-        functions: Optional[List[Dict[str, Any]]] = None,
-        logit_bias: Optional[Dict[str, float]] = None,
-        logprobs: Optional[bool] = None,
-        max_completion_tokens: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        response_format: Optional[Dict[str, str]] = None,
-        seed: Optional[int] = None,
-        stop: Optional[Union[str, List[str]]] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[Dict[str, Any]] = None,
-        temperature: Optional[float] = None,
-        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        top_logprobs: Optional[int] = None,
-        top_p: Optional[float] = None,
-        user: Optional[str] = None,
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
+        user: str | None = None,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        messages = openai_messages_to_messages(messages)
+        response_format = _convert_openai_request_response_format(response_format)
+        sampling_params = _convert_openai_sampling_params(
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+        )
+        tool_config = _convert_openai_request_tool_config(tool_choice)
+
+        tools = _convert_openai_request_tools(tools)
+        if tool_config.tool_choice == ToolChoice.none:
+            tools = []
+
+        outstanding_responses = []
+        # "n" is the number of completions to generate per prompt
+        n = n or 1
+        for _i in range(0, n):
+            response = self.chat_completion(
+                model_id=model,
+                messages=messages,
+                sampling_params=sampling_params,
+                response_format=response_format,
+                stream=stream,
+                tool_config=tool_config,
+                tools=tools,
+            )
+            outstanding_responses.append(response)
+
+        if stream:
+            return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses)
+
+        return await OpenAIChatCompletionToLlamaStackMixin._process_non_stream_response(
+            self, model, outstanding_responses
+        )
+
+    async def _process_stream_response(
+        self,
+        model: str,
+        outstanding_responses: list[Awaitable[AsyncIterator[ChatCompletionResponseStreamChunk]]],
+    ):
+        id = f"chatcmpl-{uuid.uuid4()}"
+        for i, outstanding_response in enumerate(outstanding_responses):
+            response = await outstanding_response
+            async for chunk in response:
+                event = chunk.event
+                finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason)
+
+                if isinstance(event.delta, TextDelta):
+                    text_delta = event.delta.text
+                    delta = OpenAIChoiceDelta(content=text_delta)
+                    yield OpenAIChatCompletionChunk(
+                        id=id,
+                        choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)],
+                        created=int(time.time()),
+                        model=model,
+                        object="chat.completion.chunk",
+                    )
+                elif isinstance(event.delta, ToolCallDelta):
+                    if event.delta.parse_status == ToolCallParseStatus.succeeded:
+                        tool_call = event.delta.tool_call
+
+                        # First chunk includes full structure
+                        openai_tool_call = OpenAIChoiceDeltaToolCall(
+                            index=0,
+                            id=tool_call.call_id,
+                            function=OpenAIChoiceDeltaToolCallFunction(
+                                name=tool_call.tool_name,
+                                arguments="",
+                            ),
+                        )
+                        delta = OpenAIChoiceDelta(tool_calls=[openai_tool_call])
+                        yield OpenAIChatCompletionChunk(
+                            id=id,
+                            choices=[
+                                OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
+                            ],
+                            created=int(time.time()),
+                            model=model,
+                            object="chat.completion.chunk",
+                        )
+                        # arguments
+                        openai_tool_call = OpenAIChoiceDeltaToolCall(
+                            index=0,
+                            function=OpenAIChoiceDeltaToolCallFunction(
+                                arguments=tool_call.arguments_json,
+                            ),
+                        )
+                        delta = OpenAIChoiceDelta(tool_calls=[openai_tool_call])
+                        yield OpenAIChatCompletionChunk(
+                            id=id,
+                            choices=[
+                                OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
+                            ],
+                            created=int(time.time()),
+                            model=model,
+                            object="chat.completion.chunk",
+                        )
+
+    async def _process_non_stream_response(
+        self, model: str, outstanding_responses: list[Awaitable[ChatCompletionResponse]]
     ) -> OpenAIChatCompletion:
-        raise ValueError(f"{self.__class__.__name__} doesn't support openai chat completion")
+        choices = []
+        for outstanding_response in outstanding_responses:
+            response = await outstanding_response
+            completion_message = response.completion_message
+            message = await convert_message_to_openai_dict_new(completion_message)
+            finish_reason = _convert_stop_reason_to_openai_finish_reason(completion_message.stop_reason)
+
+            choice = OpenAIChatCompletionChoice(
+                index=len(choices),
+                message=message,
+                finish_reason=finish_reason,
+            )
+            choices.append(choice)
+
+        return OpenAIChatCompletion(
+            id=f"chatcmpl-{uuid.uuid4()}",
+            choices=choices,
+            created=int(time.time()),
+            model=model,
+            object="chat.completion",
+        )
diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py
index 4f9c4927a..56e33cfdf 100644
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -9,7 +9,6 @@ import base64
 import io
 import json
 import re
-from typing import List, Optional, Tuple, Union
 
 import httpx
 from PIL import Image as PIL_Image
@@ -52,6 +51,9 @@ from llama_stack.models.llama.llama3.prompt_templates import (
     SystemDefaultGenerator,
 )
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
+    PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4,
+)
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
 from llama_stack.providers.utils.inference import supported_inference_models
@@ -60,7 +62,7 @@ log = get_logger(name=__name__, category="inference")
 
 
 class ChatCompletionRequestWithRawContent(ChatCompletionRequest):
-    messages: List[RawMessage]
+    messages: list[RawMessage]
 
 
 class CompletionRequestWithRawContent(CompletionRequest):
@@ -90,8 +92,8 @@ def interleaved_content_as_str(content: InterleavedContent, sep: str = " ") -> s
 
 
 async def convert_request_to_raw(
-    request: Union[ChatCompletionRequest, CompletionRequest],
-) -> Union[ChatCompletionRequestWithRawContent, CompletionRequestWithRawContent]:
+    request: ChatCompletionRequest | CompletionRequest,
+) -> ChatCompletionRequestWithRawContent | CompletionRequestWithRawContent:
     if isinstance(request, ChatCompletionRequest):
         messages = []
         for m in request.messages:
@@ -167,18 +169,18 @@ def content_has_media(content: InterleavedContent):
         return _has_media_content(content)
 
 
-def messages_have_media(messages: List[Message]):
+def messages_have_media(messages: list[Message]):
     return any(content_has_media(m.content) for m in messages)
 
 
-def request_has_media(request: Union[ChatCompletionRequest, CompletionRequest]):
+def request_has_media(request: ChatCompletionRequest | CompletionRequest):
     if isinstance(request, ChatCompletionRequest):
         return messages_have_media(request.messages)
     else:
         return content_has_media(request.content)
 
 
-async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
+async def localize_image_content(media: ImageContentItem) -> tuple[bytes, str]:
     image = media.image
     if image.url and image.url.uri.startswith("http"):
         async with httpx.AsyncClient() as client:
@@ -225,7 +227,7 @@ async def completion_request_to_prompt(request: CompletionRequest) -> str:
 
 async def completion_request_to_prompt_model_input_info(
     request: CompletionRequest,
-) -> Tuple[str, int]:
+) -> tuple[str, int]:
     content = augment_content_with_response_format_prompt(request.response_format, request.content)
     request.content = content
     request = await convert_request_to_raw(request)
@@ -262,7 +264,7 @@ async def chat_completion_request_to_prompt(request: ChatCompletionRequest, llam
 
 async def chat_completion_request_to_model_input_info(
     request: ChatCompletionRequest, llama_model: str
-) -> Tuple[str, int]:
+) -> tuple[str, int]:
     messages = chat_completion_request_to_messages(request, llama_model)
     request.messages = messages
     request = await convert_request_to_raw(request)
@@ -281,7 +283,7 @@ async def chat_completion_request_to_model_input_info(
 def chat_completion_request_to_messages(
     request: ChatCompletionRequest,
     llama_model: str,
-) -> List[Message]:
+) -> list[Message]:
     """Reads chat completion request and augments the messages to handle tools.
     For eg. for llama_3_1, add system message with the appropriate tools or
     add user messsage for custom tools, etc.
@@ -306,10 +308,11 @@ def chat_completion_request_to_messages(
     elif model.model_family in (
         ModelFamily.llama3_2,
         ModelFamily.llama3_3,
-        ModelFamily.llama4,
     ):
-        # llama3.2, llama3.3 and llama4 models follow the same tool prompt format
-        messages = augment_messages_for_tools_llama_3_2(request)
+        # llama3.2, llama3.3 follow the same tool prompt format
+        messages = augment_messages_for_tools_llama(request, PythonListCustomToolGenerator)
+    elif model.model_family == ModelFamily.llama4:
+        messages = augment_messages_for_tools_llama(request, PythonListCustomToolGeneratorLlama4)
     else:
         messages = request.messages
 
@@ -319,7 +322,7 @@ def chat_completion_request_to_messages(
     return messages
 
 
-def response_format_prompt(fmt: Optional[ResponseFormat]):
+def response_format_prompt(fmt: ResponseFormat | None):
     if not fmt:
         return None
 
@@ -333,7 +336,7 @@ def response_format_prompt(fmt: Optional[ResponseFormat]):
 
 def augment_messages_for_tools_llama_3_1(
     request: ChatCompletionRequest,
-) -> List[Message]:
+) -> list[Message]:
     existing_messages = request.messages
     existing_system_message = None
     if existing_messages[0].role == Role.system.value:
@@ -379,7 +382,7 @@ def augment_messages_for_tools_llama_3_1(
 
     messages.append(SystemMessage(content=sys_content))
 
-    has_custom_tools = any(isinstance(dfn.tool_name, str) for dfn in request.tools)
+    has_custom_tools = request.tools is not None and any(isinstance(dfn.tool_name, str) for dfn in request.tools)
     if has_custom_tools:
         fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.json
         if fmt == ToolPromptFormat.json:
@@ -399,9 +402,10 @@ def augment_messages_for_tools_llama_3_1(
     return messages
 
 
-def augment_messages_for_tools_llama_3_2(
+def augment_messages_for_tools_llama(
     request: ChatCompletionRequest,
-) -> List[Message]:
+    custom_tool_prompt_generator,
+) -> list[Message]:
     existing_messages = request.messages
     existing_system_message = None
     if existing_messages[0].role == Role.system.value:
@@ -434,7 +438,7 @@ def augment_messages_for_tools_llama_3_2(
         if existing_system_message and request.tool_config.system_message_behavior == SystemMessageBehavior.replace:
             system_prompt = existing_system_message.content
 
-        tool_template = PythonListCustomToolGenerator().gen(custom_tools, system_prompt)
+        tool_template = custom_tool_prompt_generator().gen(custom_tools, system_prompt)
 
         sys_content += tool_template.render()
         sys_content += "\n"
@@ -452,7 +456,7 @@ def augment_messages_for_tools_llama_3_2(
     return messages
 
 
-def _get_tool_choice_prompt(tool_choice: ToolChoice | str, tools: List[ToolDefinition]) -> str:
+def _get_tool_choice_prompt(tool_choice: ToolChoice | str, tools: list[ToolDefinition]) -> str:
     if tool_choice == ToolChoice.auto:
         return ""
     elif tool_choice == ToolChoice.required:
diff --git a/llama_stack/providers/utils/inference/stream_utils.py b/llama_stack/providers/utils/inference/stream_utils.py
new file mode 100644
index 000000000..a2edbb9c8
--- /dev/null
+++ b/llama_stack/providers/utils/inference/stream_utils.py
@@ -0,0 +1,129 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from collections.abc import AsyncIterator
+from datetime import datetime, timezone
+from typing import Any
+
+from llama_stack.apis.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChoice,
+    OpenAIChoiceLogprobs,
+    OpenAIMessageParam,
+)
+from llama_stack.providers.utils.inference.inference_store import InferenceStore
+
+
+async def stream_and_store_openai_completion(
+    provider_stream: AsyncIterator[OpenAIChatCompletionChunk],
+    model: str,
+    store: InferenceStore,
+    input_messages: list[OpenAIMessageParam],
+) -> AsyncIterator[OpenAIChatCompletionChunk]:
+    """
+    Wraps a provider's stream, yields chunks, and stores the full completion at the end.
+    """
+    id = None
+    created = None
+    choices_data: dict[int, dict[str, Any]] = {}
+
+    try:
+        async for chunk in provider_stream:
+            if id is None and chunk.id:
+                id = chunk.id
+            if created is None and chunk.created:
+                created = chunk.created
+
+            if chunk.choices:
+                for choice_delta in chunk.choices:
+                    idx = choice_delta.index
+                    if idx not in choices_data:
+                        choices_data[idx] = {
+                            "content_parts": [],
+                            "tool_calls_builder": {},
+                            "finish_reason": None,
+                            "logprobs_content_parts": [],
+                        }
+                    current_choice_data = choices_data[idx]
+
+                    if choice_delta.delta:
+                        delta = choice_delta.delta
+                        if delta.content:
+                            current_choice_data["content_parts"].append(delta.content)
+                        if delta.tool_calls:
+                            for tool_call_delta in delta.tool_calls:
+                                tc_idx = tool_call_delta.index
+                                if tc_idx not in current_choice_data["tool_calls_builder"]:
+                                    # Initialize with correct structure for _ToolCallBuilderData
+                                    current_choice_data["tool_calls_builder"][tc_idx] = {
+                                        "id": None,
+                                        "type": "function",
+                                        "function_name_parts": [],
+                                        "function_arguments_parts": [],
+                                    }
+                                builder = current_choice_data["tool_calls_builder"][tc_idx]
+                                if tool_call_delta.id:
+                                    builder["id"] = tool_call_delta.id
+                                if tool_call_delta.type:
+                                    builder["type"] = tool_call_delta.type
+                                if tool_call_delta.function:
+                                    if tool_call_delta.function.name:
+                                        builder["function_name_parts"].append(tool_call_delta.function.name)
+                                    if tool_call_delta.function.arguments:
+                                        builder["function_arguments_parts"].append(tool_call_delta.function.arguments)
+                    if choice_delta.finish_reason:
+                        current_choice_data["finish_reason"] = choice_delta.finish_reason
+                    if choice_delta.logprobs and choice_delta.logprobs.content:
+                        # Ensure that we are extending with the correct type
+                        current_choice_data["logprobs_content_parts"].extend(choice_delta.logprobs.content)
+            yield chunk
+    finally:
+        if id:
+            assembled_choices: list[OpenAIChoice] = []
+            for choice_idx, choice_data in choices_data.items():
+                content_str = "".join(choice_data["content_parts"])
+                assembled_tool_calls: list[OpenAIChatCompletionToolCall] = []
+                if choice_data["tool_calls_builder"]:
+                    for tc_build_data in choice_data["tool_calls_builder"].values():
+                        if tc_build_data["id"]:
+                            func_name = "".join(tc_build_data["function_name_parts"])
+                            func_args = "".join(tc_build_data["function_arguments_parts"])
+                            assembled_tool_calls.append(
+                                OpenAIChatCompletionToolCall(
+                                    id=tc_build_data["id"],
+                                    type=tc_build_data["type"],  # No or "function" needed, already set
+                                    function=OpenAIChatCompletionToolCallFunction(name=func_name, arguments=func_args),
+                                )
+                            )
+                message = OpenAIAssistantMessageParam(
+                    role="assistant",
+                    content=content_str if content_str else None,
+                    tool_calls=assembled_tool_calls if assembled_tool_calls else None,
+                )
+                logprobs_content = choice_data["logprobs_content_parts"]
+                final_logprobs = OpenAIChoiceLogprobs(content=logprobs_content) if logprobs_content else None
+
+                assembled_choices.append(
+                    OpenAIChoice(
+                        finish_reason=choice_data["finish_reason"],
+                        index=choice_idx,
+                        message=message,
+                        logprobs=final_logprobs,
+                    )
+                )
+
+            final_response = OpenAIChatCompletion(
+                id=id,
+                choices=assembled_choices,
+                created=created or int(datetime.now(timezone.utc).timestamp()),
+                model=model,
+                object="chat.completion",
+            )
+            await store.store_chat_completion(final_response, input_messages)
diff --git a/llama_stack/providers/utils/kvstore/api.py b/llama_stack/providers/utils/kvstore/api.py
index 84b1730e1..d17dc66e1 100644
--- a/llama_stack/providers/utils/kvstore/api.py
+++ b/llama_stack/providers/utils/kvstore/api.py
@@ -5,15 +5,17 @@
 # the root directory of this source tree.
 
 from datetime import datetime
-from typing import List, Optional, Protocol
+from typing import Protocol
 
 
 class KVStore(Protocol):
     # TODO: make the value type bytes instead of str
-    async def set(self, key: str, value: str, expiration: Optional[datetime] = None) -> None: ...
+    async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: ...
 
-    async def get(self, key: str) -> Optional[str]: ...
+    async def get(self, key: str) -> str | None: ...
 
     async def delete(self, key: str) -> None: ...
 
-    async def range(self, start_key: str, end_key: str) -> List[str]: ...
+    async def values_in_range(self, start_key: str, end_key: str) -> list[str]: ...
+
+    async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: ...
diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py
index 4f85982be..bbb0c5c0a 100644
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@@ -6,10 +6,9 @@
 
 import re
 from enum import Enum
-from typing import Literal, Optional, Union
+from typing import Annotated, Literal
 
 from pydantic import BaseModel, Field, field_validator
-from typing_extensions import Annotated
 
 from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
 
@@ -22,7 +21,7 @@ class KVStoreType(Enum):
 
 
 class CommonConfig(BaseModel):
-    namespace: Optional[str] = Field(
+    namespace: str | None = Field(
         default=None,
         description="All keys will be prefixed with this namespace",
     )
@@ -66,10 +65,10 @@ class SqliteKVStoreConfig(CommonConfig):
 class PostgresKVStoreConfig(CommonConfig):
     type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
     host: str = "localhost"
-    port: int = 5432
+    port: str = "5432"
     db: str = "llamastack"
     user: str
-    password: Optional[str] = None
+    password: str | None = None
     table_name: str = "llamastack_kvstore"
 
     @classmethod
@@ -108,7 +107,7 @@ class MongoDBKVStoreConfig(CommonConfig):
     port: int = 27017
     db: str = "llamastack"
     user: str = None
-    password: Optional[str] = None
+    password: str | None = None
     collection_name: str = "llamastack_kvstore"
 
     @classmethod
@@ -126,6 +125,6 @@ class MongoDBKVStoreConfig(CommonConfig):
 
 
 KVStoreConfig = Annotated[
-    Union[RedisKVStoreConfig, SqliteKVStoreConfig, PostgresKVStoreConfig, MongoDBKVStoreConfig],
+    RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig,
     Field(discriminator="type", default=KVStoreType.sqlite.value),
 ]
diff --git a/llama_stack/providers/utils/kvstore/kvstore.py b/llama_stack/providers/utils/kvstore/kvstore.py
index 6bc175260..3a1ee8a26 100644
--- a/llama_stack/providers/utils/kvstore/kvstore.py
+++ b/llama_stack/providers/utils/kvstore/kvstore.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List, Optional
 
 from .api import KVStore
 from .config import KVStoreConfig, KVStoreType
@@ -21,15 +20,22 @@ class InmemoryKVStoreImpl(KVStore):
     async def initialize(self) -> None:
         pass
 
-    async def get(self, key: str) -> Optional[str]:
+    async def get(self, key: str) -> str | None:
         return self._store.get(key)
 
     async def set(self, key: str, value: str) -> None:
         self._store[key] = value
 
-    async def range(self, start_key: str, end_key: str) -> List[str]:
+    async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
         return [self._store[key] for key in self._store.keys() if key >= start_key and key < end_key]
 
+    async def keys_in_range(self, start_key: str, end_key: str) -> list[str]:
+        """Get all keys in the given range."""
+        return [key for key in self._store.keys() if key >= start_key and key < end_key]
+
+    async def delete(self, key: str) -> None:
+        del self._store[key]
+
 
 async def kvstore_impl(config: KVStoreConfig) -> KVStore:
     if config.type == KVStoreType.redis.value:
diff --git a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py b/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
index c1581dc8d..3842773d9 100644
--- a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
+++ b/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
@@ -6,7 +6,6 @@
 
 import logging
 from datetime import datetime
-from typing import List, Optional
 
 from pymongo import AsyncMongoClient
 
@@ -43,12 +42,12 @@ class MongoDBKVStoreImpl(KVStore):
             return key
         return f"{self.config.namespace}:{key}"
 
-    async def set(self, key: str, value: str, expiration: Optional[datetime] = None) -> None:
+    async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
         key = self._namespaced_key(key)
         update_query = {"$set": {"value": value, "expiration": expiration}}
         await self.collection.update_one({"key": key}, update_query, upsert=True)
 
-    async def get(self, key: str) -> Optional[str]:
+    async def get(self, key: str) -> str | None:
         key = self._namespaced_key(key)
         query = {"key": key}
         result = await self.collection.find_one(query, {"value": 1, "_id": 0})
@@ -58,7 +57,7 @@ class MongoDBKVStoreImpl(KVStore):
         key = self._namespaced_key(key)
         await self.collection.delete_one({"key": key})
 
-    async def range(self, start_key: str, end_key: str) -> List[str]:
+    async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
         start_key = self._namespaced_key(start_key)
         end_key = self._namespaced_key(end_key)
         query = {
@@ -69,3 +68,10 @@ class MongoDBKVStoreImpl(KVStore):
         async for doc in cursor:
             result.append(doc["value"])
         return result
+
+    async def keys_in_range(self, start_key: str, end_key: str) -> list[str]:
+        start_key = self._namespaced_key(start_key)
+        end_key = self._namespaced_key(end_key)
+        query = {"key": {"$gte": start_key, "$lt": end_key}}
+        cursor = self.collection.find(query, {"key": 1, "_id": 0}).sort("key", 1)
+        return [doc["key"] for doc in cursor]
diff --git a/llama_stack/providers/utils/kvstore/postgres/postgres.py b/llama_stack/providers/utils/kvstore/postgres/postgres.py
index 097d36066..bd35decfc 100644
--- a/llama_stack/providers/utils/kvstore/postgres/postgres.py
+++ b/llama_stack/providers/utils/kvstore/postgres/postgres.py
@@ -6,7 +6,6 @@
 
 import logging
 from datetime import datetime
-from typing import List, Optional
 
 import psycopg2
 from psycopg2.extras import DictCursor
@@ -54,7 +53,7 @@ class PostgresKVStoreImpl(KVStore):
             return key
         return f"{self.config.namespace}:{key}"
 
-    async def set(self, key: str, value: str, expiration: Optional[datetime] = None) -> None:
+    async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
         key = self._namespaced_key(key)
         self.cursor.execute(
             f"""
@@ -66,7 +65,7 @@ class PostgresKVStoreImpl(KVStore):
             (key, value, expiration),
         )
 
-    async def get(self, key: str) -> Optional[str]:
+    async def get(self, key: str) -> str | None:
         key = self._namespaced_key(key)
         self.cursor.execute(
             f"""
@@ -86,7 +85,7 @@ class PostgresKVStoreImpl(KVStore):
             (key,),
         )
 
-    async def range(self, start_key: str, end_key: str) -> List[str]:
+    async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
         start_key = self._namespaced_key(start_key)
         end_key = self._namespaced_key(end_key)
 
@@ -100,3 +99,13 @@ class PostgresKVStoreImpl(KVStore):
             (start_key, end_key),
         )
         return [row[0] for row in self.cursor.fetchall()]
+
+    async def keys_in_range(self, start_key: str, end_key: str) -> list[str]:
+        start_key = self._namespaced_key(start_key)
+        end_key = self._namespaced_key(end_key)
+
+        self.cursor.execute(
+            f"SELECT key FROM {self.config.table_name} WHERE key >= %s AND key < %s",
+            (start_key, end_key),
+        )
+        return [row[0] for row in self.cursor.fetchall()]
diff --git a/llama_stack/providers/utils/kvstore/redis/redis.py b/llama_stack/providers/utils/kvstore/redis/redis.py
index a390ea866..3d2d956c3 100644
--- a/llama_stack/providers/utils/kvstore/redis/redis.py
+++ b/llama_stack/providers/utils/kvstore/redis/redis.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 from datetime import datetime
-from typing import List, Optional
 
 from redis.asyncio import Redis
 
@@ -25,13 +24,13 @@ class RedisKVStoreImpl(KVStore):
             return key
         return f"{self.config.namespace}:{key}"
 
-    async def set(self, key: str, value: str, expiration: Optional[datetime] = None) -> None:
+    async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
         key = self._namespaced_key(key)
         await self.redis.set(key, value)
         if expiration:
             await self.redis.expireat(key, expiration)
 
-    async def get(self, key: str) -> Optional[str]:
+    async def get(self, key: str) -> str | None:
         key = self._namespaced_key(key)
         value = await self.redis.get(key)
         if value is None:
@@ -43,7 +42,7 @@ class RedisKVStoreImpl(KVStore):
         key = self._namespaced_key(key)
         await self.redis.delete(key)
 
-    async def range(self, start_key: str, end_key: str) -> List[str]:
+    async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
         start_key = self._namespaced_key(start_key)
         end_key = self._namespaced_key(end_key)
         cursor = 0
@@ -68,3 +67,10 @@ class RedisKVStoreImpl(KVStore):
             ]
 
         return []
+
+    async def keys_in_range(self, start_key: str, end_key: str) -> list[str]:
+        """Get all keys in the given range."""
+        matching_keys = await self.redis.zrangebylex(self.namespace, f"[{start_key}", f"[{end_key}")
+        if not matching_keys:
+            return []
+        return [k.decode("utf-8") for k in matching_keys]
diff --git a/llama_stack/providers/utils/kvstore/sqlite/sqlite.py b/llama_stack/providers/utils/kvstore/sqlite/sqlite.py
index bc0488aac..4e49e4d8c 100644
--- a/llama_stack/providers/utils/kvstore/sqlite/sqlite.py
+++ b/llama_stack/providers/utils/kvstore/sqlite/sqlite.py
@@ -6,7 +6,6 @@
 
 import os
 from datetime import datetime
-from typing import List, Optional
 
 import aiosqlite
 
@@ -33,7 +32,7 @@ class SqliteKVStoreImpl(KVStore):
             )
             await db.commit()
 
-    async def set(self, key: str, value: str, expiration: Optional[datetime] = None) -> None:
+    async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
         async with aiosqlite.connect(self.db_path) as db:
             await db.execute(
                 f"INSERT OR REPLACE INTO {self.table_name} (key, value, expiration) VALUES (?, ?, ?)",
@@ -41,7 +40,7 @@ class SqliteKVStoreImpl(KVStore):
             )
             await db.commit()
 
-    async def get(self, key: str) -> Optional[str]:
+    async def get(self, key: str) -> str | None:
         async with aiosqlite.connect(self.db_path) as db:
             async with db.execute(f"SELECT value, expiration FROM {self.table_name} WHERE key = ?", (key,)) as cursor:
                 row = await cursor.fetchone()
@@ -55,7 +54,7 @@ class SqliteKVStoreImpl(KVStore):
             await db.execute(f"DELETE FROM {self.table_name} WHERE key = ?", (key,))
             await db.commit()
 
-    async def range(self, start_key: str, end_key: str) -> List[str]:
+    async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
         async with aiosqlite.connect(self.db_path) as db:
             async with db.execute(
                 f"SELECT key, value, expiration FROM {self.table_name} WHERE key >= ? AND key <= ?",
@@ -66,3 +65,13 @@ class SqliteKVStoreImpl(KVStore):
                     _, value, _ = row
                     result.append(value)
                 return result
+
+    async def keys_in_range(self, start_key: str, end_key: str) -> list[str]:
+        """Get all keys in the given range."""
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                f"SELECT key FROM {self.table_name} WHERE key >= ? AND key <= ?",
+                (start_key, end_key),
+            )
+            rows = await cursor.fetchall()
+            return [row[0] for row in rows]
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index ba4403ea1..4cd15860b 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -9,7 +9,7 @@ import logging
 import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional
+from typing import Any
 from urllib.parse import unquote
 
 import httpx
@@ -94,7 +94,7 @@ def content_from_data(data_url: str) -> str:
         return ""
 
 
-def concat_interleaved_content(content: List[InterleavedContent]) -> InterleavedContent:
+def concat_interleaved_content(content: list[InterleavedContent]) -> InterleavedContent:
     """concatenate interleaved content into a single list. ensure that 'str's are converted to TextContentItem when in a list"""
 
     ret = []
@@ -118,58 +118,86 @@ async def content_from_doc(doc: RAGDocument) -> str:
     if isinstance(doc.content, URL):
         if doc.content.uri.startswith("data:"):
             return content_from_data(doc.content.uri)
-        else:
-            async with httpx.AsyncClient() as client:
-                r = await client.get(doc.content.uri)
-            if doc.mime_type == "application/pdf":
-                return parse_pdf(r.content)
-            else:
-                return r.text
-
-    pattern = re.compile("^(https?://|file://|data:)")
-    if pattern.match(doc.content):
-        if doc.content.startswith("data:"):
-            return content_from_data(doc.content)
-        else:
+        async with httpx.AsyncClient() as client:
+            r = await client.get(doc.content.uri)
+        if doc.mime_type == "application/pdf":
+            return parse_pdf(r.content)
+        return r.text
+    elif isinstance(doc.content, str):
+        pattern = re.compile("^(https?://|file://|data:)")
+        if pattern.match(doc.content):
+            if doc.content.startswith("data:"):
+                return content_from_data(doc.content)
             async with httpx.AsyncClient() as client:
                 r = await client.get(doc.content)
             if doc.mime_type == "application/pdf":
                 return parse_pdf(r.content)
-            else:
-                return r.text
-
-    return interleaved_content_as_str(doc.content)
+            return r.text
+        return doc.content
+    else:
+        # will raise ValueError if the content is not List[InterleavedContent] or InterleavedContent
+        return interleaved_content_as_str(doc.content)
 
 
-def make_overlapped_chunks(document_id: str, text: str, window_len: int, overlap_len: int) -> List[Chunk]:
+def make_overlapped_chunks(
+    document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any]
+) -> list[Chunk]:
     tokenizer = Tokenizer.get_instance()
     tokens = tokenizer.encode(text, bos=False, eos=False)
+    try:
+        metadata_string = str(metadata)
+    except Exception as e:
+        raise ValueError("Failed to serialize metadata to string") from e
+
+    metadata_tokens = tokenizer.encode(metadata_string, bos=False, eos=False)
 
     chunks = []
     for i in range(0, len(tokens), window_len - overlap_len):
         toks = tokens[i : i + window_len]
         chunk = tokenizer.decode(toks)
+        chunk_metadata = metadata.copy()
+        chunk_metadata["document_id"] = document_id
+        chunk_metadata["token_count"] = len(toks)
+        chunk_metadata["metadata_token_count"] = len(metadata_tokens)
+
         # chunk is a string
         chunks.append(
             Chunk(
                 content=chunk,
-                metadata={
-                    "token_count": len(toks),
-                    "document_id": document_id,
-                },
+                metadata=chunk_metadata,
             )
         )
 
     return chunks
 
 
+def _validate_embedding(embedding: NDArray, index: int, expected_dimension: int):
+    """Helper method to validate embedding format and dimensions"""
+    if not isinstance(embedding, (list | np.ndarray)):
+        raise ValueError(f"Embedding at index {index} must be a list or numpy array, got {type(embedding)}")
+
+    if isinstance(embedding, np.ndarray):
+        if not np.issubdtype(embedding.dtype, np.number):
+            raise ValueError(f"Embedding at index {index} contains non-numeric values")
+    else:
+        if not all(isinstance(e, (float | int | np.number)) for e in embedding):
+            raise ValueError(f"Embedding at index {index} contains non-numeric values")
+
+    if len(embedding) != expected_dimension:
+        raise ValueError(f"Embedding at index {index} has dimension {len(embedding)}, expected {expected_dimension}")
+
+
 class EmbeddingIndex(ABC):
     @abstractmethod
-    async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
+    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         raise NotImplementedError()
 
     @abstractmethod
-    async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+        raise NotImplementedError()
+
+    @abstractmethod
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         raise NotImplementedError()
 
     @abstractmethod
@@ -185,26 +213,40 @@ class VectorDBWithIndex:
 
     async def insert_chunks(
         self,
-        chunks: List[Chunk],
+        chunks: list[Chunk],
     ) -> None:
-        embeddings_response = await self.inference_api.embeddings(
-            self.vector_db.embedding_model, [x.content for x in chunks]
-        )
-        embeddings = np.array(embeddings_response.embeddings)
+        chunks_to_embed = []
+        for i, c in enumerate(chunks):
+            if c.embedding is None:
+                chunks_to_embed.append(c)
+            else:
+                _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
 
+        if chunks_to_embed:
+            resp = await self.inference_api.embeddings(
+                self.vector_db.embedding_model,
+                [c.content for c in chunks_to_embed],
+            )
+            for c, embedding in zip(chunks_to_embed, resp.embeddings, strict=False):
+                c.embedding = embedding
+
+        embeddings = np.array([c.embedding for c in chunks], dtype=np.float32)
         await self.index.add_chunks(chunks, embeddings)
 
     async def query_chunks(
         self,
         query: InterleavedContent,
-        params: Optional[Dict[str, Any]] = None,
+        params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
         if params is None:
             params = {}
         k = params.get("max_chunks", 3)
+        mode = params.get("mode")
         score_threshold = params.get("score_threshold", 0.0)
-
-        query_str = interleaved_content_as_str(query)
-        embeddings_response = await self.inference_api.embeddings(self.vector_db.embedding_model, [query_str])
-        query_vector = np.array(embeddings_response.embeddings[0], dtype=np.float32)
-        return await self.index.query(query_vector, k, score_threshold)
+        query_string = interleaved_content_as_str(query)
+        if mode == "keyword":
+            return await self.index.query_keyword(query_string, k, score_threshold)
+        else:
+            embeddings_response = await self.inference_api.embeddings(self.vector_db.embedding_model, [query_string])
+            query_vector = np.array(embeddings_response.embeddings[0], dtype=np.float32)
+            return await self.index.query_vector(query_vector, k, score_threshold)
diff --git a/llama_stack/providers/utils/datasetio/pagination.py b/llama_stack/providers/utils/pagination.py
similarity index 95%
rename from llama_stack/providers/utils/datasetio/pagination.py
rename to llama_stack/providers/utils/pagination.py
index 1b693f8f5..033022491 100644
--- a/llama_stack/providers/utils/datasetio/pagination.py
+++ b/llama_stack/providers/utils/pagination.py
@@ -4,13 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, List
+from typing import Any
 
 from llama_stack.apis.common.responses import PaginatedResponse
 
 
 def paginate_records(
-    records: List[Dict[str, Any]],
+    records: list[dict[str, Any]],
     start_index: int | None = None,
     limit: int | None = None,
 ) -> PaginatedResponse:
diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py
new file mode 100644
index 000000000..15354e3e2
--- /dev/null
+++ b/llama_stack/providers/utils/responses/responses_store.py
@@ -0,0 +1,135 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.agents import (
+    Order,
+)
+from llama_stack.apis.agents.openai_responses import (
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    OpenAIResponseInput,
+    OpenAIResponseObject,
+    OpenAIResponseObjectWithInput,
+)
+from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
+
+from ..sqlstore.api import ColumnDefinition, ColumnType
+from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl
+
+
+class ResponsesStore:
+    def __init__(self, sql_store_config: SqlStoreConfig):
+        if not sql_store_config:
+            sql_store_config = SqliteSqlStoreConfig(
+                db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
+            )
+        self.sql_store = sqlstore_impl(sql_store_config)
+
+    async def initialize(self):
+        """Create the necessary tables if they don't exist."""
+        await self.sql_store.create_table(
+            "openai_responses",
+            {
+                "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
+                "created_at": ColumnType.INTEGER,
+                "response_object": ColumnType.JSON,
+                "model": ColumnType.STRING,
+            },
+        )
+
+    async def store_response_object(
+        self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput]
+    ) -> None:
+        data = response_object.model_dump()
+        data["input"] = [input_item.model_dump() for input_item in input]
+
+        await self.sql_store.insert(
+            "openai_responses",
+            {
+                "id": data["id"],
+                "created_at": data["created_at"],
+                "model": data["model"],
+                "response_object": data,
+            },
+        )
+
+    async def list_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        """
+        List responses from the database.
+
+        :param after: The ID of the last response to return.
+        :param limit: The maximum number of responses to return.
+        :param model: The model to filter by.
+        :param order: The order to sort the responses by.
+        """
+        # TODO: support after
+        if after:
+            raise NotImplementedError("After is not supported for SQLite")
+        if not order:
+            order = Order.desc
+
+        rows = await self.sql_store.fetch_all(
+            "openai_responses",
+            where={"model": model} if model else None,
+            order_by=[("created_at", order.value)],
+            limit=limit,
+        )
+
+        data = [OpenAIResponseObjectWithInput(**row["response_object"]) for row in rows]
+        return ListOpenAIResponseObject(
+            data=data,
+            # TODO: implement has_more
+            has_more=False,
+            first_id=data[0].id if data else "",
+            last_id=data[-1].id if data else "",
+        )
+
+    async def get_response_object(self, response_id: str) -> OpenAIResponseObjectWithInput:
+        row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id})
+        if not row:
+            raise ValueError(f"Response with id {response_id} not found") from None
+        return OpenAIResponseObjectWithInput(**row["response_object"])
+
+    async def list_response_input_items(
+        self,
+        response_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        include: list[str] | None = None,
+        limit: int | None = 20,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseInputItem:
+        """
+        List input items for a given response.
+
+        :param response_id: The ID of the response to retrieve input items for.
+        :param after: An item ID to list items after, used for pagination.
+        :param before: An item ID to list items before, used for pagination.
+        :param include: Additional fields to include in the response.
+        :param limit: A limit on the number of objects to be returned.
+        :param order: The order to return the input items in.
+        """
+        # TODO: support after/before pagination
+        if after or before:
+            raise NotImplementedError("After/before pagination is not supported yet")
+        if include:
+            raise NotImplementedError("Include is not supported yet")
+
+        response_with_input = await self.get_response_object(response_id)
+        input_items = response_with_input.input
+
+        if order == Order.desc:
+            input_items = list(reversed(input_items))
+
+        if limit is not None and len(input_items) > limit:
+            input_items = input_items[:limit]
+
+        return ListOpenAIResponseInputItem(data=input_items)
diff --git a/llama_stack/providers/utils/scheduler.py b/llama_stack/providers/utils/scheduler.py
new file mode 100644
index 000000000..845ab1f02
--- /dev/null
+++ b/llama_stack/providers/utils/scheduler.py
@@ -0,0 +1,266 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import abc
+import asyncio
+import functools
+import threading
+from collections.abc import Callable, Coroutine, Iterable
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, TypeAlias
+
+from pydantic import BaseModel
+
+from llama_stack.log import get_logger
+
+logger = get_logger(name=__name__, category="scheduler")
+
+
+# TODO: revisit the list of possible statuses when defining a more coherent
+# Jobs API for all API flows; e.g. do we need new vs scheduled?
+class JobStatus(Enum):
+    new = "new"
+    scheduled = "scheduled"
+    running = "running"
+    failed = "failed"
+    completed = "completed"
+
+
+JobID: TypeAlias = str
+JobType: TypeAlias = str
+
+
+class JobArtifact(BaseModel):
+    type: JobType
+    name: str
+    # TODO: uri should be a reference to /files API; revisit when /files is implemented
+    uri: str | None = None
+    metadata: dict[str, Any]
+
+
+JobHandler = Callable[
+    [Callable[[str], None], Callable[[JobStatus], None], Callable[[JobArtifact], None]], Coroutine[Any, Any, None]
+]
+
+
+LogMessage: TypeAlias = tuple[datetime, str]
+
+
+_COMPLETED_STATUSES = {JobStatus.completed, JobStatus.failed}
+
+
+class Job:
+    def __init__(self, job_type: JobType, job_id: JobID, handler: JobHandler):
+        super().__init__()
+        self.id = job_id
+        self._type = job_type
+        self._handler = handler
+        self._artifacts: list[JobArtifact] = []
+        self._logs: list[LogMessage] = []
+        self._state_transitions: list[tuple[datetime, JobStatus]] = [(datetime.now(timezone.utc), JobStatus.new)]
+
+    @property
+    def handler(self) -> JobHandler:
+        return self._handler
+
+    @property
+    def status(self) -> JobStatus:
+        return self._state_transitions[-1][1]
+
+    @status.setter
+    def status(self, status: JobStatus):
+        if status in _COMPLETED_STATUSES and self.status in _COMPLETED_STATUSES:
+            raise ValueError(f"Job is already in a completed state ({self.status})")
+        if self.status == status:
+            return
+        self._state_transitions.append((datetime.now(timezone.utc), status))
+
+    @property
+    def artifacts(self) -> list[JobArtifact]:
+        return self._artifacts
+
+    def register_artifact(self, artifact: JobArtifact) -> None:
+        self._artifacts.append(artifact)
+
+    def _find_state_transition_date(self, status: Iterable[JobStatus]) -> datetime | None:
+        for date, s in reversed(self._state_transitions):
+            if s in status:
+                return date
+        return None
+
+    @property
+    def scheduled_at(self) -> datetime | None:
+        return self._find_state_transition_date([JobStatus.scheduled])
+
+    @property
+    def started_at(self) -> datetime | None:
+        return self._find_state_transition_date([JobStatus.running])
+
+    @property
+    def completed_at(self) -> datetime | None:
+        return self._find_state_transition_date(_COMPLETED_STATUSES)
+
+    @property
+    def logs(self) -> list[LogMessage]:
+        return self._logs[:]
+
+    def append_log(self, message: LogMessage) -> None:
+        self._logs.append(message)
+
+    # TODO: implement
+    def cancel(self) -> None:
+        raise NotImplementedError
+
+
+class _SchedulerBackend(abc.ABC):
+    @abc.abstractmethod
+    def on_log_message_cb(self, job: Job, message: LogMessage) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def on_status_change_cb(self, job: Job, status: JobStatus) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def on_artifact_collected_cb(self, job: Job, artifact: JobArtifact) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    async def shutdown(self) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def schedule(
+        self,
+        job: Job,
+        on_log_message_cb: Callable[[str], None],
+        on_status_change_cb: Callable[[JobStatus], None],
+        on_artifact_collected_cb: Callable[[JobArtifact], None],
+    ) -> None:
+        raise NotImplementedError
+
+
+class _NaiveSchedulerBackend(_SchedulerBackend):
+    def __init__(self, timeout: int = 5):
+        self._timeout = timeout
+        self._loop = asyncio.new_event_loop()
+        # There may be performance implications of using threads due to Python
+        # GIL; may need to measure if it's a real problem though
+        self._thread = threading.Thread(target=self._run_loop, daemon=True)
+        self._thread.start()
+
+    def _run_loop(self) -> None:
+        asyncio.set_event_loop(self._loop)
+        self._loop.run_forever()
+
+        # When stopping the loop, give tasks a chance to finish
+        # TODO: should we explicitly inform jobs of pending stoppage?
+        for task in asyncio.all_tasks(self._loop):
+            self._loop.run_until_complete(task)
+        self._loop.close()
+
+    async def shutdown(self) -> None:
+        self._loop.call_soon_threadsafe(self._loop.stop)
+        self._thread.join()
+
+    # TODO: decouple scheduling and running the job
+    def schedule(
+        self,
+        job: Job,
+        on_log_message_cb: Callable[[str], None],
+        on_status_change_cb: Callable[[JobStatus], None],
+        on_artifact_collected_cb: Callable[[JobArtifact], None],
+    ) -> None:
+        async def do():
+            try:
+                job.status = JobStatus.running
+                await job.handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb)
+            except Exception as e:
+                on_log_message_cb(str(e))
+                job.status = JobStatus.failed
+                logger.exception(f"Job {job.id} failed.")
+
+        asyncio.run_coroutine_threadsafe(do(), self._loop)
+
+    def on_log_message_cb(self, job: Job, message: LogMessage) -> None:
+        pass
+
+    def on_status_change_cb(self, job: Job, status: JobStatus) -> None:
+        pass
+
+    def on_artifact_collected_cb(self, job: Job, artifact: JobArtifact) -> None:
+        pass
+
+
+_BACKENDS = {
+    "naive": _NaiveSchedulerBackend,
+}
+
+
+def _get_backend_impl(backend: str) -> _SchedulerBackend:
+    try:
+        return _BACKENDS[backend]()
+    except KeyError as e:
+        raise ValueError(f"Unknown backend {backend}") from e
+
+
+class Scheduler:
+    def __init__(self, backend: str = "naive"):
+        # TODO: if server crashes, job states are lost; we need to persist jobs on disc
+        self._jobs: dict[JobID, Job] = {}
+        self._backend = _get_backend_impl(backend)
+
+    def _on_log_message_cb(self, job: Job, message: str) -> None:
+        msg = (datetime.now(timezone.utc), message)
+        # At least for the time being, until there's a better way to expose
+        # logs to users, log messages on console
+        logger.info(f"Job {job.id}: {message}")
+        job.append_log(msg)
+        self._backend.on_log_message_cb(job, msg)
+
+    def _on_status_change_cb(self, job: Job, status: JobStatus) -> None:
+        job.status = status
+        self._backend.on_status_change_cb(job, status)
+
+    def _on_artifact_collected_cb(self, job: Job, artifact: JobArtifact) -> None:
+        job.register_artifact(artifact)
+        self._backend.on_artifact_collected_cb(job, artifact)
+
+    def schedule(self, type_: JobType, job_id: JobID, handler: JobHandler) -> JobID:
+        job = Job(type_, job_id, handler)
+        if job.id in self._jobs:
+            raise ValueError(f"Job {job.id} already exists")
+
+        self._jobs[job.id] = job
+        job.status = JobStatus.scheduled
+        self._backend.schedule(
+            job,
+            functools.partial(self._on_log_message_cb, job),
+            functools.partial(self._on_status_change_cb, job),
+            functools.partial(self._on_artifact_collected_cb, job),
+        )
+
+        return job.id
+
+    def cancel(self, job_id: JobID) -> None:
+        self.get_job(job_id).cancel()
+
+    def get_job(self, job_id: JobID) -> Job:
+        try:
+            return self._jobs[job_id]
+        except KeyError as e:
+            raise ValueError(f"Job {job_id} not found") from e
+
+    def get_jobs(self, type_: JobType | None = None) -> list[Job]:
+        jobs = list(self._jobs.values())
+        if type_:
+            jobs = [job for job in jobs if job._type == type_]
+        return jobs
+
+    async def shutdown(self):
+        # TODO: also cancel jobs once implemented
+        await self._backend.shutdown()
diff --git a/llama_stack/providers/utils/scoring/aggregation_utils.py b/llama_stack/providers/utils/scoring/aggregation_utils.py
index 7254c9433..cff9a112f 100644
--- a/llama_stack/providers/utils/scoring/aggregation_utils.py
+++ b/llama_stack/providers/utils/scoring/aggregation_utils.py
@@ -4,13 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import statistics
-from typing import Any, Dict, List
+from typing import Any
 
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import AggregationFunctionType
 
 
-def aggregate_accuracy(scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
+def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
     num_correct = sum(result["score"] for result in scoring_results)
     avg_score = num_correct / len(scoring_results)
 
@@ -21,14 +21,14 @@ def aggregate_accuracy(scoring_results: List[ScoringResultRow]) -> Dict[str, Any
     }
 
 
-def aggregate_average(scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
+def aggregate_average(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
     return {
         "average": sum(result["score"] for result in scoring_results if result["score"] is not None)
         / len([_ for _ in scoring_results if _["score"] is not None]),
     }
 
 
-def aggregate_weighted_average(scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
+def aggregate_weighted_average(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
     return {
         "weighted_average": sum(
             result["score"] * result["weight"]
@@ -40,14 +40,14 @@ def aggregate_weighted_average(scoring_results: List[ScoringResultRow]) -> Dict[
 
 
 def aggregate_categorical_count(
-    scoring_results: List[ScoringResultRow],
-) -> Dict[str, Any]:
+    scoring_results: list[ScoringResultRow],
+) -> dict[str, Any]:
     scores = [str(r["score"]) for r in scoring_results]
     unique_scores = sorted(set(scores))
     return {"categorical_count": {s: scores.count(s) for s in unique_scores}}
 
 
-def aggregate_median(scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
+def aggregate_median(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
     scores = [r["score"] for r in scoring_results if r["score"] is not None]
     median = statistics.median(scores) if scores else None
     return {"median": median}
@@ -64,8 +64,8 @@ AGGREGATION_FUNCTIONS = {
 
 
 def aggregate_metrics(
-    scoring_results: List[ScoringResultRow], metrics: List[AggregationFunctionType]
-) -> Dict[str, Any]:
+    scoring_results: list[ScoringResultRow], metrics: list[AggregationFunctionType]
+) -> dict[str, Any]:
     agg_results = {}
     for metric in metrics:
         if metric not in AGGREGATION_FUNCTIONS:
diff --git a/llama_stack/providers/utils/scoring/base_scoring_fn.py b/llama_stack/providers/utils/scoring/base_scoring_fn.py
index 834deb7e1..2fae177b7 100644
--- a/llama_stack/providers/utils/scoring/base_scoring_fn.py
+++ b/llama_stack/providers/utils/scoring/base_scoring_fn.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFn
@@ -28,28 +28,28 @@ class BaseScoringFn(ABC):
     @abstractmethod
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         raise NotImplementedError()
 
     @abstractmethod
     async def aggregate(
         self,
-        scoring_results: List[ScoringResultRow],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
-    ) -> Dict[str, Any]:
+        scoring_results: list[ScoringResultRow],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
+    ) -> dict[str, Any]:
         raise NotImplementedError()
 
     @abstractmethod
     async def score(
         self,
-        input_rows: List[Dict[str, Any]],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
-    ) -> List[ScoringResultRow]:
+        input_rows: list[dict[str, Any]],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
+    ) -> list[ScoringResultRow]:
         raise NotImplementedError()
 
 
@@ -65,7 +65,7 @@ class RegisteredBaseScoringFn(BaseScoringFn):
     def __str__(self) -> str:
         return self.__class__.__name__
 
-    def get_supported_scoring_fn_defs(self) -> List[ScoringFn]:
+    def get_supported_scoring_fn_defs(self) -> list[ScoringFn]:
         return list(self.supported_fn_defs_registry.values())
 
     def register_scoring_fn_def(self, scoring_fn: ScoringFn) -> None:
@@ -81,18 +81,18 @@ class RegisteredBaseScoringFn(BaseScoringFn):
     @abstractmethod
     async def score_row(
         self,
-        input_row: Dict[str, Any],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
     ) -> ScoringResultRow:
         raise NotImplementedError()
 
     async def aggregate(
         self,
-        scoring_results: List[ScoringResultRow],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
-    ) -> Dict[str, Any]:
+        scoring_results: list[ScoringResultRow],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
+    ) -> dict[str, Any]:
         params = self.supported_fn_defs_registry[scoring_fn_identifier].params
         if scoring_params is not None:
             if params is None:
@@ -107,8 +107,8 @@ class RegisteredBaseScoringFn(BaseScoringFn):
 
     async def score(
         self,
-        input_rows: List[Dict[str, Any]],
-        scoring_fn_identifier: Optional[str] = None,
-        scoring_params: Optional[ScoringFnParams] = None,
-    ) -> List[ScoringResultRow]:
+        input_rows: list[dict[str, Any]],
+        scoring_fn_identifier: str | None = None,
+        scoring_params: ScoringFnParams | None = None,
+    ) -> list[ScoringResultRow]:
         return [await self.score_row(input_row, scoring_fn_identifier, scoring_params) for input_row in input_rows]
diff --git a/llama_stack/providers/utils/scoring/basic_scoring_utils.py b/llama_stack/providers/utils/scoring/basic_scoring_utils.py
index 91abfdb2e..7372a521c 100644
--- a/llama_stack/providers/utils/scoring/basic_scoring_utils.py
+++ b/llama_stack/providers/utils/scoring/basic_scoring_utils.py
@@ -5,8 +5,8 @@
 # the root directory of this source tree.
 import contextlib
 import signal
+from collections.abc import Iterator
 from types import FrameType
-from typing import Iterator, Optional
 
 
 class TimeoutError(Exception):
@@ -15,7 +15,7 @@ class TimeoutError(Exception):
 
 @contextlib.contextmanager
 def time_limit(seconds: float) -> Iterator[None]:
-    def signal_handler(signum: int, frame: Optional[FrameType]) -> None:
+    def signal_handler(signum: int, frame: FrameType | None) -> None:
         raise TimeoutError("Timed out!")
 
     signal.setitimer(signal.ITIMER_REAL, seconds)
diff --git a/llama_stack/providers/utils/sqlstore/api.py b/llama_stack/providers/utils/sqlstore/api.py
new file mode 100644
index 000000000..ace40e4c4
--- /dev/null
+++ b/llama_stack/providers/utils/sqlstore/api.py
@@ -0,0 +1,90 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from collections.abc import Mapping
+from enum import Enum
+from typing import Any, Literal, Protocol
+
+from pydantic import BaseModel
+
+
+class ColumnType(Enum):
+    INTEGER = "INTEGER"
+    STRING = "STRING"
+    TEXT = "TEXT"
+    FLOAT = "FLOAT"
+    BOOLEAN = "BOOLEAN"
+    JSON = "JSON"
+    DATETIME = "DATETIME"
+
+
+class ColumnDefinition(BaseModel):
+    type: ColumnType
+    primary_key: bool = False
+    nullable: bool = True
+    default: Any = None
+
+
+class SqlStore(Protocol):
+    """
+    A protocol for a SQL store.
+    """
+
+    async def create_table(self, table: str, schema: Mapping[str, ColumnType | ColumnDefinition]) -> None:
+        """
+        Create a table.
+        """
+        pass
+
+    async def insert(self, table: str, data: Mapping[str, Any]) -> None:
+        """
+        Insert a row into a table.
+        """
+        pass
+
+    async def fetch_all(
+        self,
+        table: str,
+        where: Mapping[str, Any] | None = None,
+        limit: int | None = None,
+        order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
+    ) -> list[dict[str, Any]]:
+        """
+        Fetch all rows from a table.
+        """
+        pass
+
+    async def fetch_one(
+        self,
+        table: str,
+        where: Mapping[str, Any] | None = None,
+        order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
+    ) -> dict[str, Any] | None:
+        """
+        Fetch one row from a table.
+        """
+        pass
+
+    async def update(
+        self,
+        table: str,
+        data: Mapping[str, Any],
+        where: Mapping[str, Any],
+    ) -> None:
+        """
+        Update a row in a table.
+        """
+        pass
+
+    async def delete(
+        self,
+        table: str,
+        where: Mapping[str, Any],
+    ) -> None:
+        """
+        Delete a row from a table.
+        """
+        pass
diff --git a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
new file mode 100644
index 000000000..825220679
--- /dev/null
+++ b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -0,0 +1,163 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from collections.abc import Mapping
+from typing import Any, Literal
+
+from sqlalchemy import (
+    JSON,
+    Boolean,
+    Column,
+    DateTime,
+    Float,
+    Integer,
+    MetaData,
+    String,
+    Table,
+    Text,
+    select,
+)
+from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
+
+from .api import ColumnDefinition, ColumnType, SqlStore
+from .sqlstore import SqlAlchemySqlStoreConfig
+
+TYPE_MAPPING: dict[ColumnType, Any] = {
+    ColumnType.INTEGER: Integer,
+    ColumnType.STRING: String,
+    ColumnType.FLOAT: Float,
+    ColumnType.BOOLEAN: Boolean,
+    ColumnType.DATETIME: DateTime,
+    ColumnType.TEXT: Text,
+    ColumnType.JSON: JSON,
+}
+
+
+class SqlAlchemySqlStoreImpl(SqlStore):
+    def __init__(self, config: SqlAlchemySqlStoreConfig):
+        self.config = config
+        self.async_session = async_sessionmaker(create_async_engine(config.engine_str))
+        self.metadata = MetaData()
+
+    async def create_table(
+        self,
+        table: str,
+        schema: Mapping[str, ColumnType | ColumnDefinition],
+    ) -> None:
+        if not schema:
+            raise ValueError(f"No columns defined for table '{table}'.")
+
+        sqlalchemy_columns: list[Column] = []
+
+        for col_name, col_props in schema.items():
+            col_type = None
+            is_primary_key = False
+            is_nullable = True  # Default to nullable
+
+            if isinstance(col_props, ColumnType):
+                col_type = col_props
+            elif isinstance(col_props, ColumnDefinition):
+                col_type = col_props.type
+                is_primary_key = col_props.primary_key
+                is_nullable = col_props.nullable
+
+            sqlalchemy_type = TYPE_MAPPING.get(col_type)
+            if not sqlalchemy_type:
+                raise ValueError(f"Unsupported column type '{col_type}' for column '{col_name}'.")
+
+            sqlalchemy_columns.append(
+                Column(col_name, sqlalchemy_type, primary_key=is_primary_key, nullable=is_nullable)
+            )
+
+        # Check if table already exists in metadata, otherwise define it
+        if table not in self.metadata.tables:
+            sqlalchemy_table = Table(table, self.metadata, *sqlalchemy_columns)
+        else:
+            sqlalchemy_table = self.metadata.tables[table]
+
+        # Create the table in the database if it doesn't exist
+        # checkfirst=True ensures it doesn't try to recreate if it's already there
+        engine = create_async_engine(self.config.engine_str)
+        async with engine.begin() as conn:
+            await conn.run_sync(self.metadata.create_all, tables=[sqlalchemy_table], checkfirst=True)
+
+    async def insert(self, table: str, data: Mapping[str, Any]) -> None:
+        async with self.async_session() as session:
+            await session.execute(self.metadata.tables[table].insert(), data)
+            await session.commit()
+
+    async def fetch_all(
+        self,
+        table: str,
+        where: Mapping[str, Any] | None = None,
+        limit: int | None = None,
+        order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
+    ) -> list[dict[str, Any]]:
+        async with self.async_session() as session:
+            query = select(self.metadata.tables[table])
+            if where:
+                for key, value in where.items():
+                    query = query.where(self.metadata.tables[table].c[key] == value)
+            if limit:
+                query = query.limit(limit)
+            if order_by:
+                if not isinstance(order_by, list):
+                    raise ValueError(
+                        f"order_by must be a list of tuples (column, order={['asc', 'desc']}), got {order_by}"
+                    )
+                for order in order_by:
+                    if not isinstance(order, tuple):
+                        raise ValueError(
+                            f"order_by must be a list of tuples (column, order={['asc', 'desc']}), got {order_by}"
+                        )
+                    name, order_type = order
+                    if order_type == "asc":
+                        query = query.order_by(self.metadata.tables[table].c[name].asc())
+                    elif order_type == "desc":
+                        query = query.order_by(self.metadata.tables[table].c[name].desc())
+                    else:
+                        raise ValueError(f"Invalid order '{order_type}' for column '{name}'")
+            result = await session.execute(query)
+            if result.rowcount == 0:
+                return []
+            return [dict(row._mapping) for row in result]
+
+    async def fetch_one(
+        self,
+        table: str,
+        where: Mapping[str, Any] | None = None,
+        order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
+    ) -> dict[str, Any] | None:
+        rows = await self.fetch_all(table, where, limit=1, order_by=order_by)
+        if not rows:
+            return None
+        return rows[0]
+
+    async def update(
+        self,
+        table: str,
+        data: Mapping[str, Any],
+        where: Mapping[str, Any],
+    ) -> None:
+        if not where:
+            raise ValueError("where is required for update")
+
+        async with self.async_session() as session:
+            stmt = self.metadata.tables[table].update()
+            for key, value in where.items():
+                stmt = stmt.where(self.metadata.tables[table].c[key] == value)
+            await session.execute(stmt, data)
+            await session.commit()
+
+    async def delete(self, table: str, where: Mapping[str, Any]) -> None:
+        if not where:
+            raise ValueError("where is required for delete")
+
+        async with self.async_session() as session:
+            stmt = self.metadata.tables[table].delete()
+            for key, value in where.items():
+                stmt = stmt.where(self.metadata.tables[table].c[key] == value)
+            await session.execute(stmt)
+            await session.commit()
diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py
new file mode 100644
index 000000000..3091e8f96
--- /dev/null
+++ b/llama_stack/providers/utils/sqlstore/sqlstore.py
@@ -0,0 +1,90 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from abc import abstractmethod
+from enum import Enum
+from pathlib import Path
+from typing import Annotated, Literal
+
+from pydantic import BaseModel, Field
+
+from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
+
+from .api import SqlStore
+
+
+class SqlStoreType(Enum):
+    sqlite = "sqlite"
+    postgres = "postgres"
+
+
+class SqlAlchemySqlStoreConfig(BaseModel):
+    @property
+    @abstractmethod
+    def engine_str(self) -> str: ...
+
+    # TODO: move this when we have a better way to specify dependencies with internal APIs
+    @property
+    def pip_packages(self) -> list[str]:
+        return ["sqlalchemy[asyncio]"]
+
+
+class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
+    type: Literal["sqlite"] = SqlStoreType.sqlite.value
+    db_path: str = Field(
+        default=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
+        description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
+    )
+
+    @property
+    def engine_str(self) -> str:
+        return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix()
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
+        return cls(
+            type="sqlite",
+            db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+        )
+
+    @property
+    def pip_packages(self) -> list[str]:
+        return super().pip_packages + ["aiosqlite"]
+
+
+class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
+    type: Literal["postgres"] = SqlStoreType.postgres.value
+    host: str = "localhost"
+    port: str = "5432"
+    db: str = "llamastack"
+    user: str
+    password: str | None = None
+
+    @property
+    def engine_str(self) -> str:
+        return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
+
+    @property
+    def pip_packages(self) -> list[str]:
+        return super().pip_packages + ["asyncpg"]
+
+
+SqlStoreConfig = Annotated[
+    SqliteSqlStoreConfig | PostgresSqlStoreConfig,
+    Field(discriminator="type", default=SqlStoreType.sqlite.value),
+]
+
+
+def sqlstore_impl(config: SqlStoreConfig) -> SqlStore:
+    if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]:
+        from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
+
+        impl = SqlAlchemySqlStoreImpl(config)
+    else:
+        raise ValueError(f"Unknown sqlstore type {config.type}")
+
+    return impl
diff --git a/llama_stack/providers/utils/telemetry/dataset_mixin.py b/llama_stack/providers/utils/telemetry/dataset_mixin.py
index 34c612133..fe729a244 100644
--- a/llama_stack/providers/utils/telemetry/dataset_mixin.py
+++ b/llama_stack/providers/utils/telemetry/dataset_mixin.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List, Optional
 
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.telemetry import QueryCondition, QuerySpansResponse, Span
@@ -17,10 +16,10 @@ class TelemetryDatasetMixin:
 
     async def save_spans_to_dataset(
         self,
-        attribute_filters: List[QueryCondition],
-        attributes_to_save: List[str],
+        attribute_filters: list[QueryCondition],
+        attributes_to_save: list[str],
         dataset_id: str,
-        max_depth: Optional[int] = None,
+        max_depth: int | None = None,
     ) -> None:
         if self.datasetio_api is None:
             raise RuntimeError("DatasetIO API not available")
@@ -48,9 +47,9 @@ class TelemetryDatasetMixin:
 
     async def query_spans(
         self,
-        attribute_filters: List[QueryCondition],
-        attributes_to_return: List[str],
-        max_depth: Optional[int] = None,
+        attribute_filters: list[QueryCondition],
+        attributes_to_return: list[str],
+        max_depth: int | None = None,
     ) -> QuerySpansResponse:
         traces = await self.query_traces(attribute_filters=attribute_filters)
         spans = []
diff --git a/llama_stack/providers/utils/telemetry/sqlite_trace_store.py b/llama_stack/providers/utils/telemetry/sqlite_trace_store.py
index 3248f3fa7..af1145fe7 100644
--- a/llama_stack/providers/utils/telemetry/sqlite_trace_store.py
+++ b/llama_stack/providers/utils/telemetry/sqlite_trace_store.py
@@ -6,7 +6,7 @@
 
 import json
 from datetime import datetime
-from typing import Dict, List, Optional, Protocol
+from typing import Protocol
 
 import aiosqlite
 
@@ -16,18 +16,18 @@ from llama_stack.apis.telemetry import QueryCondition, Span, SpanWithStatus, Tra
 class TraceStore(Protocol):
     async def query_traces(
         self,
-        attribute_filters: Optional[List[QueryCondition]] = None,
-        limit: Optional[int] = 100,
-        offset: Optional[int] = 0,
-        order_by: Optional[List[str]] = None,
-    ) -> List[Trace]: ...
+        attribute_filters: list[QueryCondition] | None = None,
+        limit: int | None = 100,
+        offset: int | None = 0,
+        order_by: list[str] | None = None,
+    ) -> list[Trace]: ...
 
     async def get_span_tree(
         self,
         span_id: str,
-        attributes_to_return: Optional[List[str]] = None,
-        max_depth: Optional[int] = None,
-    ) -> Dict[str, SpanWithStatus]: ...
+        attributes_to_return: list[str] | None = None,
+        max_depth: int | None = None,
+    ) -> dict[str, SpanWithStatus]: ...
 
 
 class SQLiteTraceStore(TraceStore):
@@ -36,11 +36,11 @@ class SQLiteTraceStore(TraceStore):
 
     async def query_traces(
         self,
-        attribute_filters: Optional[List[QueryCondition]] = None,
-        limit: Optional[int] = 100,
-        offset: Optional[int] = 0,
-        order_by: Optional[List[str]] = None,
-    ) -> List[Trace]:
+        attribute_filters: list[QueryCondition] | None = None,
+        limit: int | None = 100,
+        offset: int | None = 0,
+        order_by: list[str] | None = None,
+    ) -> list[Trace]:
         def build_where_clause() -> tuple[str, list]:
             if not attribute_filters:
                 return "", []
@@ -112,9 +112,9 @@ class SQLiteTraceStore(TraceStore):
     async def get_span_tree(
         self,
         span_id: str,
-        attributes_to_return: Optional[List[str]] = None,
-        max_depth: Optional[int] = None,
-    ) -> Dict[str, SpanWithStatus]:
+        attributes_to_return: list[str] | None = None,
+        max_depth: int | None = None,
+    ) -> dict[str, SpanWithStatus]:
         # Build the attributes selection
         attributes_select = "s.attributes"
         if attributes_to_return:
diff --git a/llama_stack/providers/utils/telemetry/trace_protocol.py b/llama_stack/providers/utils/telemetry/trace_protocol.py
index 525ade74d..eb6d8b331 100644
--- a/llama_stack/providers/utils/telemetry/trace_protocol.py
+++ b/llama_stack/providers/utils/telemetry/trace_protocol.py
@@ -7,8 +7,9 @@
 import asyncio
 import inspect
 import json
+from collections.abc import AsyncGenerator, Callable
 from functools import wraps
-from typing import Any, AsyncGenerator, Callable, Type, TypeVar
+from typing import Any, TypeVar
 
 from pydantic import BaseModel
 
@@ -25,13 +26,13 @@ def _prepare_for_json(value: Any) -> str:
     """Serialize a single value into JSON-compatible format."""
     if value is None:
         return ""
-    elif isinstance(value, (str, int, float, bool)):
+    elif isinstance(value, str | int | float | bool):
         return value
     elif hasattr(value, "_name_"):
         return value._name_
     elif isinstance(value, BaseModel):
         return json.loads(value.model_dump_json())
-    elif isinstance(value, (list, tuple, set)):
+    elif isinstance(value, list | tuple | set):
         return [_prepare_for_json(item) for item in value]
     elif isinstance(value, dict):
         return {str(k): _prepare_for_json(v) for k, v in value.items()}
@@ -43,7 +44,7 @@ def _prepare_for_json(value: Any) -> str:
             return str(value)
 
 
-def trace_protocol(cls: Type[T]) -> Type[T]:
+def trace_protocol(cls: type[T]) -> type[T]:
     """
     A class decorator that automatically traces all methods in a protocol/base class
     and its inheriting classes.
diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py
index 3d5c717d6..4edfa6516 100644
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@@ -10,9 +10,10 @@ import logging
 import queue
 import random
 import threading
+from collections.abc import Callable
 from datetime import datetime, timezone
 from functools import wraps
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any
 
 from llama_stack.apis.telemetry import (
     LogSeverity,
@@ -33,6 +34,8 @@ logger = get_logger(__name__, category="core")
 INVALID_SPAN_ID = 0x0000000000000000
 INVALID_TRACE_ID = 0x00000000000000000000000000000000
 
+ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
+
 
 def trace_id_to_str(trace_id: int) -> str:
     """Convenience trace ID formatting method
@@ -106,13 +109,13 @@ class BackgroundLogger:
 
 
 class TraceContext:
-    spans: List[Span] = []
+    spans: list[Span] = []
 
     def __init__(self, logger: BackgroundLogger, trace_id: str):
         self.logger = logger
         self.trace_id = trace_id
 
-    def push_span(self, name: str, attributes: Dict[str, Any] = None) -> Span:
+    def push_span(self, name: str, attributes: dict[str, Any] = None) -> Span:
         current_span = self.get_current_span()
         span = Span(
             span_id=generate_span_id(),
@@ -168,7 +171,7 @@ def setup_logger(api: Telemetry, level: int = logging.INFO):
     root_logger.addHandler(TelemetryHandler())
 
 
-async def start_trace(name: str, attributes: Dict[str, Any] = None) -> TraceContext:
+async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceContext:
     global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
 
     if BACKGROUND_LOGGER is None:
@@ -177,7 +180,8 @@ async def start_trace(name: str, attributes: Dict[str, Any] = None) -> TraceCont
 
     trace_id = generate_trace_id()
     context = TraceContext(BACKGROUND_LOGGER, trace_id)
-    context.push_span(name, {"__root__": True, **(attributes or {})})
+    attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {})
+    context.push_span(name, attributes)
 
     CURRENT_TRACE_CONTEXT.set(context)
     return context
@@ -246,7 +250,7 @@ class TelemetryHandler(logging.Handler):
 
 
 class SpanContextManager:
-    def __init__(self, name: str, attributes: Dict[str, Any] = None):
+    def __init__(self, name: str, attributes: dict[str, Any] = None):
         self.name = name
         self.attributes = attributes
         self.span = None
@@ -316,11 +320,11 @@ class SpanContextManager:
         return wrapper
 
 
-def span(name: str, attributes: Dict[str, Any] = None):
+def span(name: str, attributes: dict[str, Any] = None):
     return SpanContextManager(name, attributes)
 
 
-def get_current_span() -> Optional[Span]:
+def get_current_span() -> Span | None:
     global CURRENT_TRACE_CONTEXT
     if CURRENT_TRACE_CONTEXT is None:
         logger.debug("No trace context to get current span")
diff --git a/llama_stack/providers/utils/tools/mcp.py b/llama_stack/providers/utils/tools/mcp.py
new file mode 100644
index 000000000..f024693a0
--- /dev/null
+++ b/llama_stack/providers/utils/tools/mcp.py
@@ -0,0 +1,100 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from contextlib import asynccontextmanager
+from typing import Any
+
+try:
+    # for python < 3.11
+    import exceptiongroup
+
+    BaseExceptionGroup = exceptiongroup.BaseExceptionGroup
+except ImportError:
+    pass
+
+import httpx
+from mcp import ClientSession
+from mcp import types as mcp_types
+from mcp.client.sse import sse_client
+
+from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem
+from llama_stack.apis.tools import (
+    ListToolDefsResponse,
+    ToolDef,
+    ToolInvocationResult,
+    ToolParameter,
+)
+from llama_stack.distribution.datatypes import AuthenticationRequiredError
+from llama_stack.log import get_logger
+
+logger = get_logger(__name__, category="tools")
+
+
+@asynccontextmanager
+async def sse_client_wrapper(endpoint: str, headers: dict[str, str]):
+    try:
+        async with sse_client(endpoint, headers=headers) as streams:
+            async with ClientSession(*streams) as session:
+                await session.initialize()
+                yield session
+    except BaseException as e:
+        if isinstance(e, BaseExceptionGroup):
+            for exc in e.exceptions:
+                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 401:
+                    raise AuthenticationRequiredError(exc) from exc
+        elif isinstance(e, httpx.HTTPStatusError) and e.response.status_code == 401:
+            raise AuthenticationRequiredError(e) from e
+
+        raise
+
+
+async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefsResponse:
+    tools = []
+    async with sse_client_wrapper(endpoint, headers) as session:
+        tools_result = await session.list_tools()
+        for tool in tools_result.tools:
+            parameters = []
+            for param_name, param_schema in tool.inputSchema.get("properties", {}).items():
+                parameters.append(
+                    ToolParameter(
+                        name=param_name,
+                        parameter_type=param_schema.get("type", "string"),
+                        description=param_schema.get("description", ""),
+                    )
+                )
+            tools.append(
+                ToolDef(
+                    name=tool.name,
+                    description=tool.description,
+                    parameters=parameters,
+                    metadata={
+                        "endpoint": endpoint,
+                    },
+                )
+            )
+    return ListToolDefsResponse(data=tools)
+
+
+async def invoke_mcp_tool(
+    endpoint: str, headers: dict[str, str], tool_name: str, kwargs: dict[str, Any]
+) -> ToolInvocationResult:
+    async with sse_client_wrapper(endpoint, headers) as session:
+        result = await session.call_tool(tool_name, kwargs)
+
+        content: list[InterleavedContentItem] = []
+        for item in result.content:
+            if isinstance(item, mcp_types.TextContent):
+                content.append(TextContentItem(text=item.text))
+            elif isinstance(item, mcp_types.ImageContent):
+                content.append(ImageContentItem(image=item.data))
+            elif isinstance(item, mcp_types.EmbeddedResource):
+                logger.warning(f"EmbeddedResource is not supported: {item}")
+            else:
+                raise ValueError(f"Unknown content type: {type(item)}")
+        return ToolInvocationResult(
+            content=content,
+            error_code=1 if result.isError else 0,
+        )
diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py
index 8143f1224..694de333e 100644
--- a/llama_stack/schema_utils.py
+++ b/llama_stack/schema_utils.py
@@ -4,37 +4,38 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from collections.abc import Callable
 from dataclasses import dataclass
-from typing import Any, Callable, List, Optional, TypeVar
+from typing import Any, TypeVar
 
 from .strong_typing.schema import json_schema_type, register_schema  # noqa: F401
 
 
 @dataclass
 class WebMethod:
-    route: Optional[str] = None
+    route: str | None = None
     public: bool = False
-    request_examples: Optional[List[Any]] = None
-    response_examples: Optional[List[Any]] = None
-    method: Optional[str] = None
-    raw_bytes_request_body: Optional[bool] = False
+    request_examples: list[Any] | None = None
+    response_examples: list[Any] | None = None
+    method: str | None = None
+    raw_bytes_request_body: bool | None = False
     # A descriptive name of the corresponding span created by tracing
-    descriptive_name: Optional[str] = None
-    experimental: Optional[bool] = False
+    descriptive_name: str | None = None
+    experimental: bool | None = False
 
 
 T = TypeVar("T", bound=Callable[..., Any])
 
 
 def webmethod(
-    route: Optional[str] = None,
-    method: Optional[str] = None,
-    public: Optional[bool] = False,
-    request_examples: Optional[List[Any]] = None,
-    response_examples: Optional[List[Any]] = None,
-    raw_bytes_request_body: Optional[bool] = False,
-    descriptive_name: Optional[str] = None,
-    experimental: Optional[bool] = False,
+    route: str | None = None,
+    method: str | None = None,
+    public: bool | None = False,
+    request_examples: list[Any] | None = None,
+    response_examples: list[Any] | None = None,
+    raw_bytes_request_body: bool | None = False,
+    descriptive_name: str | None = None,
+    experimental: bool | None = False,
 ) -> Callable[[T], T]:
     """
     Decorator that supplies additional metadata to an endpoint operation function.
diff --git a/llama_stack/strong_typing/docstring.py b/llama_stack/strong_typing/docstring.py
index b038d1024..497c9ea82 100644
--- a/llama_stack/strong_typing/docstring.py
+++ b/llama_stack/strong_typing/docstring.py
@@ -11,6 +11,7 @@ Type-safe data interchange for Python data classes.
 """
 
 import builtins
+import collections.abc
 import dataclasses
 import inspect
 import re
@@ -171,6 +172,13 @@ class SupportsDoc(Protocol):
     __doc__: Optional[str]
 
 
+def _maybe_unwrap_async_iterator(t):
+    origin_type = typing.get_origin(t)
+    if origin_type is collections.abc.AsyncIterator:
+        return typing.get_args(t)[0]
+    return t
+
+
 def parse_type(typ: SupportsDoc) -> Docstring:
     """
     Parse the docstring of a type into its components.
@@ -178,6 +186,8 @@ def parse_type(typ: SupportsDoc) -> Docstring:
     :param typ: The type whose documentation string to parse.
     :returns: Components of the documentation string.
     """
+    # Use docstring from the iterator origin type for streaming apis
+    typ = _maybe_unwrap_async_iterator(typ)
 
     doc = get_docstring(typ)
     if doc is None:
diff --git a/llama_stack/strong_typing/schema.py b/llama_stack/strong_typing/schema.py
index 0f5121906..82baddc86 100644
--- a/llama_stack/strong_typing/schema.py
+++ b/llama_stack/strong_typing/schema.py
@@ -10,6 +10,7 @@ Type-safe data interchange for Python data classes.
 :see: https://github.com/hunyadi/strong_typing
 """
 
+import collections.abc
 import dataclasses
 import datetime
 import decimal
@@ -478,6 +479,8 @@ class JsonSchemaGenerator:
                 }
             return ret
         elif origin_type is Literal:
+            if len(typing.get_args(typ)) != 1:
+                raise ValueError(f"Literal type {typ} has {len(typing.get_args(typ))} arguments")
             (literal_value,) = typing.get_args(typ)  # unpack value of literal type
             schema = self.type_to_schema(type(literal_value))
             schema["const"] = literal_value
@@ -485,6 +488,9 @@ class JsonSchemaGenerator:
         elif origin_type is type:
             (concrete_type,) = typing.get_args(typ)  # unpack single tuple element
             return {"const": self.type_to_schema(concrete_type, force_expand=True)}
+        elif origin_type is collections.abc.AsyncIterator:
+            (concrete_type,) = typing.get_args(typ)
+            return self.type_to_schema(concrete_type)
 
         # dictionary of class attributes
         members = dict(inspect.getmembers(typ, lambda a: not inspect.isroutine(a)))
diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py
index f82defb4b..bc3a9304f 100644
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@@ -29,7 +29,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -55,10 +54,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
index 6c07b0478..97a06f77a 100644
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@@ -26,7 +26,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index fe21d4bef..a58068a60 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -35,13 +35,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/bedrock/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -87,9 +90,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -99,6 +99,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db
 models:
 - metadata: {}
   model_id: meta.llama3-1-8b-instruct-v1:0
@@ -140,7 +143,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml
index ef6c43212..f26f4ed9b 100644
--- a/llama_stack/templates/cerebras/build.yaml
+++ b/llama_stack/templates/cerebras/build.yaml
@@ -27,6 +27,8 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
index c370fb7d0..d891502d8 100644
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@@ -34,7 +34,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
         ],
     }
@@ -77,10 +76,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/cerebras/doc_template.md b/llama_stack/templates/cerebras/doc_template.md
index 76f8c34ad..5cae2b2da 100644
--- a/llama_stack/templates/cerebras/doc_template.md
+++ b/llama_stack/templates/cerebras/doc_template.md
@@ -46,7 +46,7 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
 ```
diff --git a/llama_stack/templates/cerebras/report.md b/llama_stack/templates/cerebras/report.md
deleted file mode 100644
index 7c09474b1..000000000
--- a/llama_stack/templates/cerebras/report.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Report for cerebras distribution
-
-## Supported Models
-| Model Descriptor | cerebras |
-|:---|:---|
-| meta-llama/Llama-3-8B-Instruct | ❌ |
-| meta-llama/Llama-3-70B-Instruct | ❌ |
-| meta-llama/Llama-3.1-8B-Instruct | ✅ |
-| meta-llama/Llama-3.1-70B-Instruct | ❌ |
-| meta-llama/Llama-3.1-405B-Instruct-FP8 | ❌ |
-| meta-llama/Llama-3.2-1B-Instruct | ❌ |
-| meta-llama/Llama-3.2-3B-Instruct | ❌ |
-| meta-llama/Llama-3.2-11B-Vision-Instruct | ❌ |
-| meta-llama/Llama-3.2-90B-Vision-Instruct | ❌ |
-| meta-llama/Llama-3.3-70B-Instruct | ✅ |
-| meta-llama/Llama-Guard-3-11B-Vision | ❌ |
-| meta-llama/Llama-Guard-3-1B | ❌ |
-| meta-llama/Llama-Guard-3-8B | ❌ |
-| meta-llama/Llama-Guard-2-8B | ❌ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ❌ |
-
-## Vector IO
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /create_agent_turn | rag | test_rag_agent | ✅ |
-| /create_agent_turn | custom_tool | test_custom_tool | ❌ |
-| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml
index dc7ee4729..c080536b7 100644
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -79,9 +82,9 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/cerebras/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
@@ -93,15 +96,15 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/inference_store.db
 models:
 - metadata: {}
   model_id: llama3.1-8b
@@ -138,7 +141,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml
index a5c615f2f..9f4fbbdda 100644
--- a/llama_stack/templates/ci-tests/build.yaml
+++ b/llama_stack/templates/ci-tests/build.yaml
@@ -27,7 +27,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py
index f6e836918..afa8a23ce 100644
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ b/llama_stack/templates/ci-tests/ci_tests.py
@@ -40,7 +40,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -71,10 +70,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
     available_models = {
         "fireworks": MODEL_ENTRIES,
diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml
index 3c16dd5ea..368187d3a 100644
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@@ -38,13 +38,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ci-tests/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -90,9 +93,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -102,6 +102,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/inference_store.db
 models:
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
@@ -236,7 +239,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml
index 05b98d56f..513df16c1 100644
--- a/llama_stack/templates/dell/build.yaml
+++ b/llama_stack/templates/dell/build.yaml
@@ -28,6 +28,8 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py
index 52c5a5476..a7ec5f3b8 100644
--- a/llama_stack/templates/dell/dell.py
+++ b/llama_stack/templates/dell/dell.py
@@ -30,7 +30,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
         ],
     }
@@ -87,10 +86,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/dell/doc_template.md b/llama_stack/templates/dell/doc_template.md
index 26f07130b..6bdd7f81c 100644
--- a/llama_stack/templates/dell/doc_template.md
+++ b/llama_stack/templates/dell/doc_template.md
@@ -143,7 +143,7 @@ docker run \
   -v $HOME/.llama:/root/.llama \
   -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env DEH_URL=$DEH_URL \
diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml
index 802c56aad..5c6072245 100644
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@@ -41,13 +41,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -93,15 +96,15 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -127,7 +130,5 @@ tool_groups:
   provider_id: brave-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml
index 4a2d819a9..ffaa0bf2f 100644
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@@ -37,13 +37,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -89,15 +92,15 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -118,7 +121,5 @@ tool_groups:
   provider_id: brave-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/dependencies.json b/llama_stack/templates/dependencies.json
index b96191752..47a35edc0 100644
--- a/llama_stack/templates/dependencies.json
+++ b/llama_stack/templates/dependencies.json
@@ -31,6 +31,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -67,6 +68,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -105,6 +107,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "sqlite-vec",
     "tqdm",
     "transformers",
@@ -145,46 +148,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "dev": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "fastapi",
-    "fire",
-    "fireworks-ai",
-    "httpx",
-    "langdetect",
-    "litellm",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "sqlite-vec",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -224,6 +188,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -261,6 +226,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -299,6 +265,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -337,6 +304,86 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "kvant": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "llama_api": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "sqlite-vec",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -380,6 +427,7 @@
     "scipy",
     "sentence-transformers",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "torch",
     "torchao==0.8.0",
     "torchvision",
@@ -394,12 +442,11 @@
     "aiosqlite",
     "blobfile",
     "chardet",
-    "emoji",
+    "datasets",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
-    "langdetect",
     "matplotlib",
     "nltk",
     "numpy",
@@ -411,15 +458,14 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
-    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
-    "tree_sitter",
     "uvicorn"
   ],
   "ollama": [
@@ -445,6 +491,7 @@
     "opentelemetry-exporter-otlp-proto-http",
     "opentelemetry-sdk",
     "pandas",
+    "peft",
     "pillow",
     "psycopg2-binary",
     "pymongo",
@@ -455,9 +502,12 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
     "tqdm",
     "transformers",
     "tree_sitter",
+    "trl",
     "uvicorn"
   ],
   "open-benchmark": [
@@ -491,6 +541,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "sqlite-vec",
     "together",
     "tqdm",
@@ -529,6 +580,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -567,6 +619,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -583,10 +636,11 @@
     "fastapi",
     "fire",
     "httpx",
+    "litellm",
     "matplotlib",
+    "mcp",
     "nltk",
     "numpy",
-    "openai",
     "opentelemetry-exporter-otlp-proto-http",
     "opentelemetry-sdk",
     "pandas",
@@ -599,9 +653,53 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
-    "uvicorn"
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "starter": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "fireworks-ai",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "sqlite-vec",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
   "tgi": [
     "aiohttp",
@@ -636,6 +734,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -674,6 +773,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "together",
     "tqdm",
     "transformers",
@@ -713,6 +813,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "sqlite-vec",
     "tqdm",
     "transformers",
@@ -752,6 +853,7 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "sqlalchemy[asyncio]",
     "tqdm",
     "transformers",
     "tree_sitter",
@@ -759,5 +861,44 @@
     "vllm",
     "sentence-transformers --no-deps",
     "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "watsonx": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "ibm_watson_machine_learning",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ]
 }
diff --git a/llama_stack/templates/experimental-post-training/build.yaml b/llama_stack/templates/experimental-post-training/build.yaml
index b4b5e2203..55cd189c6 100644
--- a/llama_stack/templates/experimental-post-training/build.yaml
+++ b/llama_stack/templates/experimental-post-training/build.yaml
@@ -13,9 +13,10 @@ distribution_spec:
     - inline::basic
     - inline::braintrust
     post_training:
-    - inline::torchtune
+    - inline::huggingface
     datasetio:
     - inline::localfs
+    - remote::huggingface
     telemetry:
     - inline::meta-reference
     agents:
diff --git a/llama_stack/templates/experimental-post-training/run.yaml b/llama_stack/templates/experimental-post-training/run.yaml
index 2ebdfe1aa..393cba41d 100644
--- a/llama_stack/templates/experimental-post-training/run.yaml
+++ b/llama_stack/templates/experimental-post-training/run.yaml
@@ -49,16 +49,24 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/huggingface}/huggingface_datasetio.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
   post_training:
-  - provider_id: torchtune-post-training
-    provider_type: inline::torchtune
-    config: {
+  - provider_id: huggingface
+    provider_type: inline::huggingface
+    config:
       checkpoint_format: huggingface
-    }
+      distributed_backend: null
+      device: cpu
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
index 3907eba78..be19181c0 100644
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@@ -28,7 +28,9 @@ distribution_spec:
     - remote::brave-search
     - remote::tavily-search
     - remote::wolfram-alpha
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index 449f18bf7..da68475e2 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -40,7 +40,6 @@ def get_distribution_template() -> DistributionTemplate:
             "remote::brave-search",
             "remote::tavily-search",
             "remote::wolfram-alpha",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -90,10 +89,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/fireworks/report.md b/llama_stack/templates/fireworks/report.md
deleted file mode 100644
index 2c1ccc943..000000000
--- a/llama_stack/templates/fireworks/report.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# Report for fireworks distribution
-
-## Supported Models
-| Model Descriptor | fireworks |
-|:---|:---|
-| Llama-3-8B-Instruct | ❌ |
-| Llama-3-70B-Instruct | ❌ |
-| Llama3.1-8B-Instruct | ✅ |
-| Llama3.1-70B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ✅ |
-| Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-11B-Vision-Instruct | ✅ |
-| Llama3.2-90B-Vision-Instruct | ✅ |
-| Llama3.3-70B-Instruct | ✅ |
-| Llama-Guard-3-11B-Vision | ✅ |
-| Llama-Guard-3-1B | ❌ |
-| Llama-Guard-3-8B | ✅ |
-| Llama-Guard-2-8B | ❌ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
-
-## Vector IO
-| Provider | API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|:-----|
-| inline::faiss | /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| Provider | API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|:-----|
-| inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
-| inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
-| inline::meta-reference | /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml
index aa6209db6..41500f6f6 100644
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@@ -46,13 +46,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/fireworks/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -102,9 +105,6 @@ providers:
     provider_type: remote::wolfram-alpha
     config:
       api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -114,6 +114,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
 models:
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
@@ -255,7 +258,5 @@ tool_groups:
   provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index 834ec8260..b1fa03306 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -41,13 +41,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/fireworks/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -97,9 +100,6 @@ providers:
     provider_type: remote::wolfram-alpha
     config:
       api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -109,6 +109,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
 models:
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
@@ -245,7 +248,5 @@ tool_groups:
   provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/groq/build.yaml b/llama_stack/templates/groq/build.yaml
index 3263ce83b..819df22f0 100644
--- a/llama_stack/templates/groq/build.yaml
+++ b/llama_stack/templates/groq/build.yaml
@@ -24,6 +24,8 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py
index 7999f95cb..4e52aa42d 100644
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@@ -33,7 +33,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
         ],
     }
@@ -72,10 +71,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml
index f557e64fd..db7ebffee 100644
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@@ -41,13 +41,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/groq/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -93,15 +96,15 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/inference_store.db
 models:
 - metadata: {}
   model_id: groq/llama3-8b-8192
@@ -158,6 +161,16 @@ models:
   provider_id: groq
   provider_model_id: groq/llama-4-scout-17b-16e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
 - metadata: {}
   model_id: groq/llama-4-maverick-17b-128e-instruct
   provider_id: groq
@@ -168,6 +181,16 @@ models:
   provider_id: groq
   provider_model_id: groq/llama-4-maverick-17b-128e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
 - metadata:
     embedding_dimension: 384
   model_id: all-MiniLM-L6-v2
@@ -183,7 +206,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
index c2eaaa05b..8ede83694 100644
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@@ -26,7 +26,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py
index 53dc9d38f..69e037299 100644
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@@ -32,7 +32,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -79,10 +78,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
index 14753e08b..15cf2a47f 100644
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@@ -46,13 +46,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -98,9 +101,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -110,6 +110,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -135,7 +138,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
index 706ba9122..428edf9a2 100644
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@@ -41,13 +41,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -93,9 +96,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -105,6 +105,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -125,7 +128,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
index c0cc1e2c2..d0752db9a 100644
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@@ -27,7 +27,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py
index ad8a72012..ecfe2a167 100644
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@@ -32,7 +32,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -80,10 +79,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml
index bf26fe507..ab461c6c3 100644
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@@ -46,13 +46,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -98,9 +101,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -110,6 +110,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -135,7 +138,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
index cc973b8de..d238506fb 100644
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@@ -41,13 +41,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -93,9 +96,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -105,6 +105,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -125,7 +128,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/kvant/__init__.py b/llama_stack/templates/kvant/__init__.py
new file mode 100644
index 000000000..61706f7f6
--- /dev/null
+++ b/llama_stack/templates/kvant/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .kvant import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/kvant/build.yaml b/llama_stack/templates/kvant/build.yaml
new file mode 100644
index 000000000..25afc1f4d
--- /dev/null
+++ b/llama_stack/templates/kvant/build.yaml
@@ -0,0 +1,35 @@
+version: '2'
+distribution_spec:
+  description: distribution for kvant cloud
+  providers:
+    inference:
+    - remote::vllm
+    - inline::sentence-transformers
+    vector_io:
+    - inline::faiss
+    - remote::chromadb
+    - remote::pgvector
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - remote::wolfram-alpha
+    - inline::rag-runtime
+    - remote::model-context-protocol
+image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/kvant/kvant.py b/llama_stack/templates/kvant/kvant.py
new file mode 100644
index 000000000..44cfc7016
--- /dev/null
+++ b/llama_stack/templates/kvant/kvant.py
@@ -0,0 +1,136 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack.providers.remote.inference.passthrough.config import (
+    PassthroughImplConfig,
+)
+from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::openai", "inline::sentence-transformers"],
+        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "remote::wolfram-alpha",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+
+    name = "kvant"
+
+    inference_provider = Provider(
+        provider_id="openai",
+        provider_type="remote::openai",
+        config=PassthroughImplConfig.sample_run_config(),
+    )
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+    vector_io_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
+
+    default_models = [
+        ModelInput(
+            metadata={},
+            model_id="inference-llama4-maverick",
+            provider_id="openai",
+            provider_model_id="inference-llama4-maverick",
+            model_type=ModelType.llm,
+        ),
+    ]
+
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id="sentence-transformers",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::wolfram_alpha",
+            provider_id="wolfram-alpha",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+    ]
+
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Use Passthrough hosted llama-stack endpoint for LLM inference",
+        container_image=None,
+        providers=providers,
+        available_models_by_provider={
+            "openai": [
+                ProviderModelEntry(
+                    provider_model_id="inference-llama4-maverick",
+                    model_type=ModelType.llm,
+                ),
+            ],
+        },
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider, embedding_provider],
+                    "vector_io": [vector_io_provider],
+                },
+                default_models=default_models + [embedding_model],
+                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMA_STACK_PORT": (
+                "8321",
+                "Port for the Llama Stack distribution server",
+            ),
+            "OPENAI_API_KEY": (
+                "",
+                "kvant maas API Key",
+            ),
+            "OPENAI_BASE_URL": (
+                "https://maas.kvant.cloud",
+                "kvant maas URL",
+            ),
+        },
+    )
diff --git a/llama_stack/templates/kvant/run.yaml b/llama_stack/templates/kvant/run.yaml
new file mode 100644
index 000000000..99fb6f7fa
--- /dev/null
+++ b/llama_stack/templates/kvant/run.yaml
@@ -0,0 +1,170 @@
+version: '2'
+image_name: kvant
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: kvant
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:https://maas.ai-2.kvant.cloud/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:400000}
+      api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/responses_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/trace_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: wolfram-alpha
+    provider_type: remote::wolfram-alpha
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/inference_store.db
+models:
+- metadata: {}
+  model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: kvant
+  provider_model_id: inference-llama4-maverick
+  model_type: llm
+- metadata:
+    embedding_dimension: 1024
+    context_length: 8192
+  model_id: inference-bge-m3
+  provider_id: kvant
+  model_type: embedding
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields:
+- shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
+# - vector_db_id: test-bge
+#   embedding_model: inference-bge-m3
+#   embedding_dimension: 1024
+#   provider_id: faiss
+# - vector_db_id: test-MiniLM-L6-v2
+#   embedding_model: all-MiniLM-L6-v2
+#   embedding_dimension: 384
+#   provider_id: faiss
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::wolfram_alpha
+  provider_id: wolfram-alpha
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321
+  auth:
+    provider_type: "oauth2_token"
+    config:
+      jwks:
+      introspection:
+        url: ${env.KEYCLOAK_INSTROSPECT:https://iam.phoenix-systems.ch/realms/kvant/protocol/openid-connect/token/introspect} 
+        client_id: ${env.KEYCLOAK_CLIENT_ID:llama-stack}
+        client_secret: ${env.KEYCLOAK_CLIENT_SECRET}
+      claims_mapping:
+        sub: projects
+        scope: roles
+        #groups: teams
+        customer/id: teams
+        aud: namespaces
diff --git a/llama_stack/templates/llama_api/__init__.py b/llama_stack/templates/llama_api/__init__.py
new file mode 100644
index 000000000..57cc75730
--- /dev/null
+++ b/llama_stack/templates/llama_api/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .llama_api import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/dev/build.yaml b/llama_stack/templates/llama_api/build.yaml
similarity index 83%
rename from llama_stack/templates/dev/build.yaml
rename to llama_stack/templates/llama_api/build.yaml
index 726ebccca..857e5f014 100644
--- a/llama_stack/templates/dev/build.yaml
+++ b/llama_stack/templates/llama_api/build.yaml
@@ -3,11 +3,7 @@ distribution_spec:
   description: Distribution for running e2e tests in CI
   providers:
     inference:
-    - remote::openai
-    - remote::fireworks
-    - remote::anthropic
-    - remote::gemini
-    - remote::groq
+    - remote::llama-openai-compat
     - inline::sentence-transformers
     vector_io:
     - inline::sqlite-vec
@@ -31,7 +27,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py
new file mode 100644
index 000000000..b4641b9da
--- /dev/null
+++ b/llama_stack/templates/llama_api/llama_api.py
@@ -0,0 +1,153 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
+    SQLiteVectorIOConfig,
+)
+from llama_stack.providers.remote.inference.llama_openai_compat.config import (
+    LlamaCompatConfig,
+)
+from llama_stack.providers.remote.inference.llama_openai_compat.models import (
+    MODEL_ENTRIES as LLLAMA_MODEL_ENTRIES,
+)
+from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
+from llama_stack.providers.remote.vector_io.pgvector.config import (
+    PGVectorVectorIOConfig,
+)
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)
+
+
+def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]:
+    # in this template, we allow each API key to be optional
+    providers = [
+        (
+            "llama-openai-compat",
+            LLLAMA_MODEL_ENTRIES,
+            LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:}"),
+        ),
+    ]
+    inference_providers = []
+    available_models = {}
+    for provider_id, model_entries, config in providers:
+        inference_providers.append(
+            Provider(
+                provider_id=provider_id,
+                provider_type=f"remote::{provider_id}",
+                config=config,
+            )
+        )
+        available_models[provider_id] = model_entries
+    return inference_providers, available_models
+
+
+def get_distribution_template() -> DistributionTemplate:
+    inference_providers, available_models = get_inference_providers()
+    providers = {
+        "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
+        "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+    name = "llama_api"
+
+    vector_io_providers = [
+        Provider(
+            provider_id="sqlite-vec",
+            provider_type="inline::sqlite-vec",
+            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+        ),
+        Provider(
+            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_type="remote::chromadb",
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+        ),
+        Provider(
+            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_type="remote::pgvector",
+            config=PGVectorVectorIOConfig.sample_run_config(
+                db="${env.PGVECTOR_DB:}",
+                user="${env.PGVECTOR_USER:}",
+                password="${env.PGVECTOR_PASSWORD:}",
+            ),
+        ),
+    ]
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+    ]
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id=embedding_provider.provider_id,
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+
+    default_models = get_model_registry(available_models)
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Distribution for running e2e tests in CI",
+        container_image=None,
+        template_path=None,
+        providers=providers,
+        available_models_by_provider=available_models,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": inference_providers + [embedding_provider],
+                    "vector_io": vector_io_providers,
+                },
+                default_models=default_models + [embedding_model],
+                default_tool_groups=default_tool_groups,
+                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+            ),
+        },
+        run_config_env_vars={
+            "LLAMA_STACK_PORT": (
+                "8321",
+                "Port for the Llama Stack distribution server",
+            ),
+        },
+    )
diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml
new file mode 100644
index 000000000..a7f2b0769
--- /dev/null
+++ b/llama_stack/templates/llama_api/run.yaml
@@ -0,0 +1,168 @@
+version: '2'
+image_name: llama_api
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: llama-openai-compat
+    provider_type: remote::llama-openai-compat
+    config:
+      openai_compat_api_base: https://api.llama.com/compat/v1/
+      api_key: ${env.LLAMA_API_KEY:}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/sqlite_vec.db
+  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:}
+  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:localhost}
+      port: ${env.PGVECTOR_PORT:5432}
+      db: ${env.PGVECTOR_DB:}
+      user: ${env.PGVECTOR_USER:}
+      password: ${env.PGVECTOR_PASSWORD:}
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/responses_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db
+models:
+- metadata: {}
+  model_id: Llama-3.3-70B-Instruct
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-3.3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-3.3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: Llama-4-Scout-17B-16E-Instruct-FP8
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields:
+- shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321
diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml
index b9130fc7d..53ad411e3 100644
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@@ -26,7 +26,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md
index a174331b4..2ca6793d7 100644
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@@ -69,6 +69,7 @@ LLAMA_STACK_PORT=8321
 docker run \
   -it \
   --pull always \
+  --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   llamastack/distribution-{{ name }} \
@@ -82,6 +83,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 docker run \
   -it \
   --pull always \
+  --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   llamastack/distribution-{{ name }} \
diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py
index 8ba9fadca..95d126095 100644
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@@ -36,7 +36,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -86,10 +85,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
index 63177ab09..2b751a514 100644
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@@ -56,13 +56,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-gpu/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -108,9 +111,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -120,6 +120,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -145,7 +148,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml
index 380d83060..a24c5fec5 100644
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@@ -46,13 +46,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-gpu/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -98,9 +101,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -110,6 +110,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -130,7 +133,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml
index f99ff6c81..6bd8a0100 100644
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@@ -1,6 +1,6 @@
 version: '2'
 distribution_spec:
-  description: Use NVIDIA NIM for running LLM inference and safety
+  description: Use NVIDIA NIM for running LLM inference, evaluation and safety
   providers:
     inference:
     - remote::nvidia
@@ -13,13 +13,17 @@ distribution_spec:
     telemetry:
     - inline::meta-reference
     eval:
-    - inline::meta-reference
+    - remote::nvidia
     post_training:
     - remote::nvidia
     datasetio:
     - inline::localfs
+    - remote::nvidia
     scoring:
     - inline::basic
     tool_runtime:
     - inline::rag-runtime
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md
index da95227d8..50c96802f 100644
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@@ -25,14 +25,84 @@ The following models are available by default:
 {% endif %}
 
 
-### Prerequisite: API Keys
+## Prerequisites
+### NVIDIA API Keys
 
-Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/).
+Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable.
 
+### Deploy NeMo Microservices Platform
+The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform.
+
+## Supported Services
+Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints.
+
+### Inference: NVIDIA NIM
+NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs:
+  1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key)
+  2. Self-hosted: NVIDIA NIMs that run on your own infrastructure.
+
+The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment.
+
+### Datasetio API: NeMo Data Store
+The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint.
+
+See the [NVIDIA Datasetio docs](/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage.
+
+### Eval API: NeMo Evaluator
+The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Eval docs](/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage.
+
+### Post-Training API: NeMo Customizer
+The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Post-Training docs](/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage.
+
+### Safety API: NeMo Guardrails
+The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the NVIDIA Safety docs for supported features and example usage.
+
+## Deploying models
+In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`.
+
+Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart.
+```sh
+# URL to NeMo NIM Proxy service
+export NEMO_URL="http://nemo.test"
+
+curl --location "$NEMO_URL/v1/deployment/model-deployments" \
+   -H 'accept: application/json' \
+   -H 'Content-Type: application/json' \
+   -d '{
+      "name": "llama-3.2-1b-instruct",
+      "namespace": "meta",
+      "config": {
+         "model": "meta/llama-3.2-1b-instruct",
+         "nim_deployment": {
+            "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct",
+            "image_tag": "1.8.3",
+            "pvc_size": "25Gi",
+            "gpu": 1,
+            "additional_envs": {
+               "NIM_GUIDED_DECODING_BACKEND": "fast_outlines"
+            }
+         }
+      }
+   }'
+```
+This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference.
+
+You can also remove a deployed NIM to free up GPU resources, if needed.
+```sh
+export NEMO_URL="http://nemo.test"
+
+curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct"
+```
 
 ## Running Llama Stack with NVIDIA
 
-You can do this via Conda (build code) or Docker which has a pre-built image.
+You can do this via Conda or venv (build code), or Docker which has a pre-built image.
 
 ### Via Docker
 
@@ -46,7 +116,7 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env NVIDIA_API_KEY=$NVIDIA_API_KEY
 ```
@@ -54,9 +124,23 @@ docker run \
 ### Via Conda
 
 ```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
 llama stack build --template nvidia --image-type conda
 llama stack run ./run.yaml \
   --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL
+```
+
+### Via venv
+
+If you've set up your local development environment, you can also build the image using your local virtual environment.
+
+```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
+llama stack build --template nvidia --image-type venv
+llama stack run ./run.yaml \
+  --port 8321 \
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
   --env INFERENCE_MODEL=$INFERENCE_MODEL
 ```
diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py
index 3b0cbe1e5..bfd004037 100644
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@@ -7,6 +7,8 @@
 from pathlib import Path
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
+from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
+from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
 from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
@@ -20,9 +22,9 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["remote::nvidia"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
+        "eval": ["remote::nvidia"],
         "post_training": ["remote::nvidia"],
-        "datasetio": ["inline::localfs"],
+        "datasetio": ["inline::localfs", "remote::nvidia"],
         "scoring": ["inline::basic"],
         "tool_runtime": ["inline::rag-runtime"],
     }
@@ -37,6 +39,16 @@ def get_distribution_template() -> DistributionTemplate:
         provider_type="remote::nvidia",
         config=NVIDIASafetyConfig.sample_run_config(),
     )
+    datasetio_provider = Provider(
+        provider_id="nvidia",
+        provider_type="remote::nvidia",
+        config=NvidiaDatasetIOConfig.sample_run_config(),
+    )
+    eval_provider = Provider(
+        provider_id="nvidia",
+        provider_type="remote::nvidia",
+        config=NVIDIAEvalConfig.sample_run_config(),
+    )
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
         provider_id="nvidia",
@@ -59,8 +71,8 @@ def get_distribution_template() -> DistributionTemplate:
     default_models = get_model_registry(available_models)
     return DistributionTemplate(
         name="nvidia",
-        distro_type="remote_hosted",
-        description="Use NVIDIA NIM for running LLM inference and safety",
+        distro_type="self_hosted",
+        description="Use NVIDIA NIM for running LLM inference, evaluation and safety",
         container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
@@ -69,6 +81,8 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "datasetio": [datasetio_provider],
+                    "eval": [eval_provider],
                 },
                 default_models=default_models,
                 default_tool_groups=default_tool_groups,
@@ -78,7 +92,8 @@ def get_distribution_template() -> DistributionTemplate:
                     "inference": [
                         inference_provider,
                         safety_provider,
-                    ]
+                    ],
+                    "eval": [eval_provider],
                 },
                 default_models=[inference_model, safety_model],
                 default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
@@ -90,19 +105,15 @@ def get_distribution_template() -> DistributionTemplate:
                 "",
                 "NVIDIA API Key",
             ),
-            ## Nemo Customizer related variables
-            "NVIDIA_USER_ID": (
-                "llama-stack-user",
-                "NVIDIA User ID",
+            "NVIDIA_APPEND_API_VERSION": (
+                "True",
+                "Whether to append the API version to the base_url",
             ),
+            ## Nemo Customizer related variables
             "NVIDIA_DATASET_NAMESPACE": (
                 "default",
                 "NVIDIA Dataset Namespace",
             ),
-            "NVIDIA_ACCESS_POLICIES": (
-                "{}",
-                "NVIDIA Access Policies",
-            ),
             "NVIDIA_PROJECT_ID": (
                 "test-project",
                 "NVIDIA Project ID",
@@ -119,6 +130,10 @@ def get_distribution_template() -> DistributionTemplate:
                 "http://0.0.0.0:7331",
                 "URL for the NeMo Guardrails Service",
             ),
+            "NVIDIA_EVALUATOR_URL": (
+                "http://0.0.0.0:7331",
+                "URL for the NeMo Evaluator Service",
+            ),
             "INFERENCE_MODEL": (
                 "Llama3.1-8B-Instruct",
                 "Inference model",
diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml
index 658d9377e..c431e12f2 100644
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@@ -18,6 +18,7 @@ providers:
     config:
       url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
       api_key: ${env.NVIDIA_API_KEY:}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
@@ -45,21 +46,21 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
   eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
+  - provider_id: nvidia
+    provider_type: remote::nvidia
     config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
   post_training:
   - provider_id: nvidia
     provider_type: remote::nvidia
@@ -76,6 +77,13 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
+      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test}
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -87,6 +95,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index 1267a9883..5b244081d 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -18,6 +18,7 @@ providers:
     config:
       url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
       api_key: ${env.NVIDIA_API_KEY:}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -40,21 +41,21 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
   eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
+  - provider_id: nvidia
+    provider_type: remote::nvidia
     config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
   post_training:
   - provider_id: nvidia
     provider_type: remote::nvidia
@@ -64,13 +65,13 @@ providers:
       project_id: ${env.NVIDIA_PROJECT_ID:test-project}
       customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
   datasetio:
-  - provider_id: localfs
-    provider_type: inline::localfs
+  - provider_id: nvidia
+    provider_type: remote::nvidia
     config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
+      api_key: ${env.NVIDIA_API_KEY:}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
+      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test}
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -82,6 +83,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
 models:
 - metadata: {}
   model_id: meta/llama3-8b-instruct
@@ -173,6 +177,16 @@ models:
   provider_id: nvidia
   provider_model_id: meta/llama-3.2-90b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.3-70b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.3-70b-instruct
+  model_type: llm
 - metadata:
     embedding_dimension: 2048
     context_length: 8192
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index 37b72fc1f..36a120897 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -23,11 +23,15 @@ distribution_spec:
     - inline::basic
     - inline::llm-as-judge
     - inline::braintrust
+    post_training:
+    - inline::huggingface
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
     - remote::wolfram-alpha
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md
index f961ab7ed..aaa65bab2 100644
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@@ -86,7 +86,7 @@ docker run \
   -v ~/.llama:/root/.llama \
   -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env SAFETY_MODEL=$SAFETY_MODEL \
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index d9f0960a2..0b4f05128 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import (
     ShieldInput,
     ToolGroupInput,
 )
+from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@@ -28,10 +29,10 @@ def get_distribution_template() -> DistributionTemplate:
         "eval": ["inline::meta-reference"],
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "post_training": ["inline::huggingface"],
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
             "remote::wolfram-alpha",
@@ -48,7 +49,11 @@ def get_distribution_template() -> DistributionTemplate:
         provider_type="inline::faiss",
         config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
     )
-
+    posttraining_provider = Provider(
+        provider_id="huggingface",
+        provider_type="inline::huggingface",
+        config=HuggingFacePostTrainingConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
         provider_id="ollama",
@@ -75,10 +80,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
         ToolGroupInput(
             toolgroup_id="builtin::wolfram_alpha",
             provider_id="wolfram-alpha",
@@ -97,6 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
                 provider_overrides={
                     "inference": [inference_provider],
                     "vector_io": [vector_io_provider_faiss],
+                    "post_training": [posttraining_provider],
                 },
                 default_models=[inference_model, embedding_model],
                 default_tool_groups=default_tool_groups,
@@ -105,6 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
                 provider_overrides={
                     "inference": [inference_provider],
                     "vector_io": [vector_io_provider_faiss],
+                    "post_training": [posttraining_provider],
                     "safety": [
                         Provider(
                             provider_id="llama-guard",
diff --git a/llama_stack/templates/ollama/report.md b/llama_stack/templates/ollama/report.md
deleted file mode 100644
index 724809a59..000000000
--- a/llama_stack/templates/ollama/report.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Report for ollama distribution
-
-## Supported Models
-| Model Descriptor | ollama |
-|:---|:---|
-| Llama-3-8B-Instruct | ❌ |
-| Llama-3-70B-Instruct | ❌ |
-| Llama3.1-8B-Instruct | ✅ |
-| Llama3.1-70B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ✅ |
-| Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-11B-Vision-Instruct | ✅ |
-| Llama3.2-90B-Vision-Instruct | ✅ |
-| Llama3.3-70B-Instruct | ✅ |
-| Llama-Guard-3-11B-Vision | ❌ |
-| Llama-Guard-3-1B | ✅ |
-| Llama-Guard-3-8B | ✅ |
-| Llama-Guard-2-8B | ❌ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
-
-## Vector IO
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /create_agent_turn | rag | test_rag_agent | ✅ |
-| /create_agent_turn | custom_tool | test_custom_tool | ✅ |
-| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index b43fec6db..d63c5e366 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -5,6 +5,7 @@ apis:
 - datasetio
 - eval
 - inference
+- post_training
 - safety
 - scoring
 - telemetry
@@ -39,13 +40,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -80,6 +84,13 @@ providers:
     provider_type: inline::braintrust
     config:
       openai_api_key: ${env.OPENAI_API_KEY:}
+  post_training:
+  - provider_id: huggingface
+    provider_type: inline::huggingface
+    config:
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
@@ -91,9 +102,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -107,6 +115,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -136,8 +147,6 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
   provider_id: wolfram-alpha
 server:
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index c8f4ad9ad..d208cd7f0 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -5,6 +5,7 @@ apis:
 - datasetio
 - eval
 - inference
+- post_training
 - safety
 - scoring
 - telemetry
@@ -37,13 +38,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -78,6 +82,13 @@ providers:
     provider_type: inline::braintrust
     config:
       openai_api_key: ${env.OPENAI_API_KEY:}
+  post_training:
+  - provider_id: huggingface
+    provider_type: inline::huggingface
+    config:
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
@@ -89,9 +100,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -105,6 +113,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -126,8 +137,6 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
   provider_id: wolfram-alpha
 server:
diff --git a/llama_stack/templates/open-benchmark/build.yaml b/llama_stack/templates/open-benchmark/build.yaml
index 1db90ef27..840f1e1db 100644
--- a/llama_stack/templates/open-benchmark/build.yaml
+++ b/llama_stack/templates/open-benchmark/build.yaml
@@ -30,7 +30,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py
index a6a906c6f..d944d4eff 100644
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Dict, List, Tuple
 
 from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
 from llama_stack.apis.models.models import ModelType
@@ -36,7 +35,7 @@ from llama_stack.templates.template import (
 )
 
 
-def get_inference_providers() -> Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]:
+def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
     # in this template, we allow each API key to be optional
     providers = [
         (
@@ -108,7 +107,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -146,10 +144,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     default_models = get_model_registry(available_models) + [
diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml
index 5e908b081..0e5edf728 100644
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@@ -64,13 +64,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/open-benchmark/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -116,9 +119,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -128,6 +128,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/inference_store.db
 models:
 - metadata: {}
   model_id: openai/gpt-4o
@@ -242,7 +245,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/passthrough/build.yaml b/llama_stack/templates/passthrough/build.yaml
index fb1fb1066..46b99cb75 100644
--- a/llama_stack/templates/passthrough/build.yaml
+++ b/llama_stack/templates/passthrough/build.yaml
@@ -28,7 +28,9 @@ distribution_spec:
     - remote::brave-search
     - remote::tavily-search
     - remote::wolfram-alpha
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/passthrough/passthrough.py b/llama_stack/templates/passthrough/passthrough.py
index 8454e49cf..6a30625c5 100644
--- a/llama_stack/templates/passthrough/passthrough.py
+++ b/llama_stack/templates/passthrough/passthrough.py
@@ -38,7 +38,6 @@ def get_distribution_template() -> DistributionTemplate:
             "remote::brave-search",
             "remote::tavily-search",
             "remote::wolfram-alpha",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -100,10 +99,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml
index 8ab6b1081..bbf5d9a52 100644
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@@ -46,13 +46,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -102,9 +105,6 @@ providers:
     provider_type: remote::wolfram-alpha
     config:
       api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -114,6 +114,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
@@ -148,7 +151,5 @@ tool_groups:
   provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml
index 53e8c8857..146906d9b 100644
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@@ -41,13 +41,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -97,9 +100,6 @@ providers:
     provider_type: remote::wolfram-alpha
     config:
       api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -109,6 +109,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
@@ -138,7 +141,5 @@ tool_groups:
   provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/postgres-demo/__init__.py b/llama_stack/templates/postgres-demo/__init__.py
new file mode 100644
index 000000000..81473cb73
--- /dev/null
+++ b/llama_stack/templates/postgres-demo/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .postgres_demo import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml
new file mode 100644
index 000000000..8f3648abe
--- /dev/null
+++ b/llama_stack/templates/postgres-demo/build.yaml
@@ -0,0 +1,24 @@
+version: '2'
+distribution_spec:
+  description: Quick start template for running Llama Stack with several popular providers
+  providers:
+    inference:
+    - remote::fireworks
+    - remote::vllm
+    vector_io:
+    - remote::chromadb
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::rag-runtime
+    - remote::model-context-protocol
+image_type: conda
+additional_pip_packages:
+- asyncpg
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py
new file mode 100644
index 000000000..d2e352320
--- /dev/null
+++ b/llama_stack/templates/postgres-demo/postgres_demo.py
@@ -0,0 +1,164 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
+from llama_stack.providers.remote.inference.fireworks.models import (
+    MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
+)
+from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
+from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
+from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
+from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)
+
+
+def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
+    # in this template, we allow each API key to be optional
+    providers = [
+        (
+            "fireworks",
+            FIREWORKS_MODEL_ENTRIES,
+            FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
+        ),
+    ]
+    inference_providers = []
+    available_models = {}
+    for provider_id, model_entries, config in providers:
+        inference_providers.append(
+            Provider(
+                provider_id=provider_id,
+                provider_type=f"remote::{provider_id}",
+                config=config,
+            )
+        )
+        available_models[provider_id] = model_entries
+    inference_providers.append(
+        Provider(
+            provider_id="vllm-inference",
+            provider_type="remote::vllm",
+            config=VLLMInferenceAdapterConfig.sample_run_config(
+                url="${env.VLLM_URL:http://localhost:8000/v1}",
+            ),
+        )
+    )
+    return inference_providers, available_models
+
+
+def get_distribution_template() -> DistributionTemplate:
+    inference_providers, available_models = get_inference_providers()
+    providers = {
+        "inference": ([p.provider_type for p in inference_providers]),
+        "vector_io": ["remote::chromadb"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+    name = "postgres-demo"
+
+    vector_io_providers = [
+        Provider(
+            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_type="remote::chromadb",
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+        ),
+    ]
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+    ]
+
+    default_models = get_model_registry(available_models)
+    default_models.append(
+        ModelInput(
+            model_id="${env.INFERENCE_MODEL}",
+            provider_id="vllm-inference",
+        )
+    )
+    postgres_config = {
+        "type": "postgres",
+        "host": "${env.POSTGRES_HOST:localhost}",
+        "port": "${env.POSTGRES_PORT:5432}",
+        "db": "${env.POSTGRES_DB:llamastack}",
+        "user": "${env.POSTGRES_USER:llamastack}",
+        "password": "${env.POSTGRES_PASSWORD:llamastack}",
+    }
+
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Quick start template for running Llama Stack with several popular providers",
+        container_image=None,
+        template_path=None,
+        providers=providers,
+        available_models_by_provider=available_models,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": inference_providers,
+                    "vector_io": vector_io_providers,
+                    "agents": [
+                        Provider(
+                            provider_id="meta-reference",
+                            provider_type="inline::meta-reference",
+                            config=dict(
+                                persistence_store=postgres_config,
+                                responses_store=postgres_config,
+                            ),
+                        )
+                    ],
+                    "telemetry": [
+                        Provider(
+                            provider_id="meta-reference",
+                            provider_type="inline::meta-reference",
+                            config=dict(
+                                service_name="${env.OTEL_SERVICE_NAME:}",
+                                sinks="${env.TELEMETRY_SINKS:console}",
+                            ),
+                        )
+                    ],
+                },
+                default_models=default_models,
+                default_tool_groups=default_tool_groups,
+                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+                metadata_store=PostgresKVStoreConfig.model_validate(postgres_config),
+                inference_store=PostgresSqlStoreConfig.model_validate(postgres_config),
+            ),
+        },
+        run_config_env_vars={
+            "LLAMA_STACK_PORT": (
+                "8321",
+                "Port for the Llama Stack distribution server",
+            ),
+            "FIREWORKS_API_KEY": (
+                "",
+                "Fireworks API Key",
+            ),
+        },
+    )
diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml
new file mode 100644
index 000000000..889b8eaa7
--- /dev/null
+++ b/llama_stack/templates/postgres-demo/run.yaml
@@ -0,0 +1,224 @@
+version: '2'
+image_name: postgres-demo
+apis:
+- agents
+- inference
+- safety
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:}
+  - provider_id: vllm-inference
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
+      api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+  vector_io:
+  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:}
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: postgres
+        host: ${env.POSTGRES_HOST:localhost}
+        port: ${env.POSTGRES_PORT:5432}
+        db: ${env.POSTGRES_DB:llamastack}
+        user: ${env.POSTGRES_USER:llamastack}
+        password: ${env.POSTGRES_PASSWORD:llamastack}
+      responses_store:
+        type: postgres
+        host: ${env.POSTGRES_HOST:localhost}
+        port: ${env.POSTGRES_PORT:5432}
+        db: ${env.POSTGRES_DB:llamastack}
+        user: ${env.POSTGRES_USER:llamastack}
+        password: ${env.POSTGRES_PASSWORD:llamastack}
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:}
+      sinks: ${env.TELEMETRY_SINKS:console}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: postgres
+  host: ${env.POSTGRES_HOST:localhost}
+  port: ${env.POSTGRES_PORT:5432}
+  db: ${env.POSTGRES_DB:llamastack}
+  user: ${env.POSTGRES_USER:llamastack}
+  password: ${env.POSTGRES_PASSWORD:llamastack}
+  table_name: llamastack_kvstore
+inference_store:
+  type: postgres
+  host: ${env.POSTGRES_HOST:localhost}
+  port: ${env.POSTGRES_PORT:5432}
+  db: ${env.POSTGRES_DB:llamastack}
+  user: ${env.POSTGRES_USER:llamastack}
+  password: ${env.POSTGRES_PASSWORD:llamastack}
+models:
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-8B-Instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-70B-Instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-8b
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-8B
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama4-scout-instruct-basic
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
+  model_type: llm
+- metadata:
+    embedding_dimension: 768
+    context_length: 8192
+  model_id: nomic-ai/nomic-embed-text-v1.5
+  provider_id: fireworks
+  provider_model_id: nomic-ai/nomic-embed-text-v1.5
+  model_type: embedding
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: vllm-inference
+  model_type: llm
+shields:
+- shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321
diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml
index b2bbf853a..16fe5d4fd 100644
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@@ -27,8 +27,10 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
     - remote::wolfram-alpha
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md
index efcdb62c6..5684888da 100644
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@@ -28,10 +28,10 @@ The following environment variables can be configured:
 
 ## Setting up vLLM server
 
-In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM
+In the following sections, we'll use AMD, NVIDIA or Intel GPUs to serve as hardware accelerators for the vLLM
 server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
 [supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
-that we only use GPUs here for demonstration purposes.
+that we only use GPUs here for demonstration purposes. Note that if you run into issues, you can include the environment variable `--env VLLM_DEBUG_LOG_API_SERVER_RESPONSE=true` (available in vLLM v0.8.3 and above) in the `docker run` command to enable log response from API server for debugging.
 
 ### Setting up vLLM server on AMD GPU
 
@@ -149,6 +149,55 @@ docker run \
     --port $SAFETY_PORT
 ```
 
+### Setting up vLLM server on Intel GPU
+
+Refer to [vLLM Documentation for XPU](https://docs.vllm.ai/en/v0.8.2/getting_started/installation/gpu.html?device=xpu) to get a vLLM endpoint. In addition to vLLM side setup which guides towards installing vLLM from sources orself-building vLLM Docker container, Intel provides prebuilt vLLM container to use on systems with Intel GPUs supported by PyTorch XPU backend:
+- [intel/vllm](https://hub.docker.com/r/intel/vllm)
+
+Here is a sample script to start a vLLM server locally via Docker using Intel provided container:
+
+```bash
+export INFERENCE_PORT=8000
+export INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+export ZE_AFFINITY_MASK=0
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $INFERENCE_PORT:$INFERENCE_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $INFERENCE_MODEL \
+    --port $INFERENCE_PORT
+```
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_PORT=8081
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export ZE_AFFINITY_MASK=1
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $SAFETY_PORT:$SAFETY_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $SAFETY_MODEL \
+    --port $SAFETY_PORT
+```
+
 ## Running Llama Stack
 
 Now you are ready to run Llama Stack with vLLM as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
@@ -171,7 +220,7 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1
@@ -193,7 +242,7 @@ docker run \
   -v ~/.llama:/root/.llama \
   -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1 \
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index bb69496aa..e83162a4f 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -50,6 +50,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -88,9 +91,9 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
@@ -102,9 +105,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -118,6 +118,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -143,8 +146,6 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
   provider_id: wolfram-alpha
 server:
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index 14f2da37e..4cdf88c6b 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -43,6 +43,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -81,9 +84,9 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
@@ -95,9 +98,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -111,6 +111,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -131,8 +134,6 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
   provider_id: wolfram-alpha
 server:
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
index 0f6c7659e..2782a3ea0 100644
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@@ -34,7 +34,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
             "remote::wolfram-alpha",
@@ -84,10 +83,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
         ToolGroupInput(
             toolgroup_id="builtin::wolfram_alpha",
             provider_id="wolfram-alpha",
diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml
index ca5ffe618..14b1c8974 100644
--- a/llama_stack/templates/sambanova/build.yaml
+++ b/llama_stack/templates/sambanova/build.yaml
@@ -1,15 +1,16 @@
 version: '2'
 distribution_spec:
-  description: Use SambaNova.AI for running LLM inference
+  description: Use SambaNova for running LLM inference and safety
   providers:
     inference:
     - remote::sambanova
+    - inline::sentence-transformers
     vector_io:
     - inline::faiss
     - remote::chromadb
     - remote::pgvector
     safety:
-    - inline::llama-guard
+    - remote::sambanova
     agents:
     - inline::meta-reference
     telemetry:
@@ -17,6 +18,10 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
+    - remote::model-context-protocol
+    - remote::wolfram-alpha
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/sambanova/doc_template.md b/llama_stack/templates/sambanova/doc_template.md
index 42d9efb66..1dc76fd3f 100644
--- a/llama_stack/templates/sambanova/doc_template.md
+++ b/llama_stack/templates/sambanova/doc_template.md
@@ -37,33 +37,44 @@ The following models are available by default:
 
 ### Prerequisite: API Keys
 
-Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/).
+Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](http://cloud.sambanova.ai?utm_source=llamastack&utm_medium=external&utm_campaign=cloud_signup).
 
 
 ## Running Llama Stack with SambaNova
 
 You can do this via Conda (build code) or Docker which has a pre-built image.
 
-### Via Docker
 
-This method allows you to get started quickly without having to build the distribution code.
+### Via Docker
 
 ```bash
 LLAMA_STACK_PORT=8321
+llama stack build --template sambanova --image-type container
 docker run \
   -it \
-  --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-{{ name }} \
+  -v ~/.llama:/root/.llama \
+  distribution-{{ name }} \
   --port $LLAMA_STACK_PORT \
   --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
 ```
 
+
+### Via Venv
+
+```bash
+llama stack build --template sambanova --image-type venv
+llama stack run --image-type venv ~/.llama/distributions/sambanova/sambanova-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
+```
+
+
 ### Via Conda
 
 ```bash
 llama stack build --template sambanova --image-type conda
-llama stack run ./run.yaml \
+llama stack run --image-type conda ~/.llama/distributions/sambanova/sambanova-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
 ```
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
index e4e8e4e21..8c2a933ab 100644
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@@ -14,6 +14,9 @@ providers:
     config:
       url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -35,10 +38,11 @@ providers:
       user: ${env.PGVECTOR_USER:}
       password: ${env.PGVECTOR_PASSWORD:}
   safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
+  - provider_id: sambanova
+    provider_type: remote::sambanova
     config:
-      excluded_categories: []
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY}
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -47,13 +51,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
@@ -65,118 +72,133 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+  - provider_id: wolfram-alpha
+    provider_type: remote::wolfram-alpha
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/inference_store.db
 models:
 - metadata: {}
-  model_id: Meta-Llama-3.1-8B-Instruct
+  model_id: sambanova/Meta-Llama-3.1-8B-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.1-8B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.1-8B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-3.1-70B-Instruct
+  model_id: sambanova/Meta-Llama-3.1-405B-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.1-70B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: sambanova
-  provider_model_id: Meta-Llama-3.1-70B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: Meta-Llama-3.1-405B-Instruct
-  provider_id: sambanova
-  provider_model_id: Meta-Llama-3.1-405B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.1-405B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-3.2-1B-Instruct
+  model_id: sambanova/Meta-Llama-3.2-1B-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.2-1B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.2-1B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-3.2-3B-Instruct
+  model_id: sambanova/Meta-Llama-3.2-3B-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.2-3B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.2-3B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-3.3-70B-Instruct
+  model_id: sambanova/Meta-Llama-3.3-70B-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.3-70B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-3.3-70B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Llama-3.2-11B-Vision-Instruct
+  model_id: sambanova/Llama-3.2-11B-Vision-Instruct
   provider_id: sambanova
-  provider_model_id: Llama-3.2-11B-Vision-Instruct
+  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: sambanova
-  provider_model_id: Llama-3.2-11B-Vision-Instruct
+  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Llama-3.2-90B-Vision-Instruct
+  model_id: sambanova/Llama-3.2-90B-Vision-Instruct
   provider_id: sambanova
-  provider_model_id: Llama-3.2-90B-Vision-Instruct
+  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: sambanova
-  provider_model_id: Llama-3.2-90B-Vision-Instruct
+  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-Guard-3-8B
+  model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
   provider_id: sambanova
-  provider_model_id: Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: sambanova
-  provider_model_id: Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata: {}
-  model_id: Llama-4-Scout-17B-16E-Instruct
-  provider_id: sambanova
-  provider_model_id: Llama-4-Scout-17B-16E-Instruct
+  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
   provider_id: sambanova
-  provider_model_id: Llama-4-Scout-17B-16E-Instruct
+  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
   model_type: llm
+- metadata: {}
+  model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Meta-Llama-Guard-3-8B
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-8B
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
 shields:
 - shield_id: meta-llama/Llama-Guard-3-8B
+  provider_shield_id: sambanova/Meta-Llama-Guard-3-8B
+- shield_id: sambanova/Meta-Llama-Guard-3-8B
+  provider_shield_id: sambanova/Meta-Llama-Guard-3-8B
 vector_dbs: []
 datasets: []
 scoring_fns: []
@@ -186,7 +208,7 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
+- toolgroup_id: builtin::wolfram_alpha
+  provider_id: wolfram-alpha
 server:
   port: 8321
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
index 8b91f8712..54a49423d 100644
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@@ -6,7 +6,16 @@
 
 from pathlib import Path
 
-from llama_stack.distribution.datatypes import Provider, ShieldInput, ToolGroupInput
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
 from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
@@ -23,26 +32,38 @@ from llama_stack.templates.template import (
 
 def get_distribution_template() -> DistributionTemplate:
     providers = {
-        "inference": ["remote::sambanova"],
+        "inference": ["remote::sambanova", "inline::sentence-transformers"],
         "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
+        "safety": ["remote::sambanova"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
+            "remote::model-context-protocol",
+            "remote::wolfram-alpha",
         ],
     }
     name = "sambanova"
-
     inference_provider = Provider(
         provider_id=name,
         provider_type=f"remote::{name}",
         config=SambaNovaImplConfig.sample_run_config(),
     )
-
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id="sentence-transformers",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
     vector_io_providers = [
         Provider(
             provider_id="faiss",
@@ -81,27 +102,35 @@ def get_distribution_template() -> DistributionTemplate:
             provider_id="rag-runtime",
         ),
         ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
+            toolgroup_id="builtin::wolfram_alpha",
+            provider_id="wolfram-alpha",
         ),
     ]
 
     return DistributionTemplate(
         name=name,
         distro_type="self_hosted",
-        description="Use SambaNova.AI for running LLM inference",
-        docker_image=None,
+        description="Use SambaNova for running LLM inference and safety",
+        container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
         available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
-                    "inference": [inference_provider],
+                    "inference": [inference_provider, embedding_provider],
                     "vector_io": vector_io_providers,
                 },
-                default_models=default_models,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+                default_models=default_models + [embedding_model],
+                default_shields=[
+                    ShieldInput(
+                        shield_id="meta-llama/Llama-Guard-3-8B", provider_shield_id="sambanova/Meta-Llama-Guard-3-8B"
+                    ),
+                    ShieldInput(
+                        shield_id="sambanova/Meta-Llama-Guard-3-8B",
+                        provider_shield_id="sambanova/Meta-Llama-Guard-3-8B",
+                    ),
+                ],
                 default_tool_groups=default_tool_groups,
             ),
         },
@@ -112,7 +141,7 @@ def get_distribution_template() -> DistributionTemplate:
             ),
             "SAMBANOVA_API_KEY": (
                 "",
-                "SambaNova.AI API Key",
+                "SambaNova API Key",
             ),
         },
     )
diff --git a/llama_stack/templates/starter/__init__.py b/llama_stack/templates/starter/__init__.py
new file mode 100644
index 000000000..9c0d937ce
--- /dev/null
+++ b/llama_stack/templates/starter/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .starter import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml
new file mode 100644
index 000000000..ec97c7d3e
--- /dev/null
+++ b/llama_stack/templates/starter/build.yaml
@@ -0,0 +1,40 @@
+version: '2'
+distribution_spec:
+  description: Quick start template for running Llama Stack with several popular providers
+  providers:
+    inference:
+    - remote::openai
+    - remote::fireworks
+    - remote::anthropic
+    - remote::gemini
+    - remote::groq
+    - remote::sambanova
+    - inline::sentence-transformers
+    vector_io:
+    - inline::sqlite-vec
+    - remote::chromadb
+    - remote::pgvector
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::rag-runtime
+    - remote::model-context-protocol
+image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/starter/run.yaml
similarity index 62%
rename from llama_stack/templates/dev/run.yaml
rename to llama_stack/templates/starter/run.yaml
index ea3b7252a..04425ed35 100644
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -1,5 +1,5 @@
 version: '2'
-image_name: dev
+image_name: starter
 apis:
 - agents
 - datasetio
@@ -34,6 +34,11 @@ providers:
     config:
       url: https://api.groq.com
       api_key: ${env.GROQ_API_KEY:}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -41,7 +46,7 @@ providers:
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/sqlite_vec.db
+      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db
   - provider_id: ${env.ENABLE_CHROMADB+chromadb}
     provider_type: remote::chromadb
     config:
@@ -66,14 +71,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dev/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -81,7 +89,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -89,14 +97,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -119,9 +127,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -130,7 +135,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/inference_store.db
 models:
 - metadata: {}
   model_id: openai/gpt-4o
@@ -147,6 +155,76 @@ models:
   provider_id: openai
   provider_model_id: openai/chatgpt-4o-latest
   model_type: llm
+- metadata: {}
+  model_id: gpt-3.5-turbo-0125
+  provider_id: openai
+  provider_model_id: gpt-3.5-turbo-0125
+  model_type: llm
+- metadata: {}
+  model_id: gpt-3.5-turbo
+  provider_id: openai
+  provider_model_id: gpt-3.5-turbo
+  model_type: llm
+- metadata: {}
+  model_id: gpt-3.5-turbo-instruct
+  provider_id: openai
+  provider_model_id: gpt-3.5-turbo-instruct
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4
+  provider_id: openai
+  provider_model_id: gpt-4
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4-turbo
+  provider_id: openai
+  provider_model_id: gpt-4-turbo
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4o
+  provider_id: openai
+  provider_model_id: gpt-4o
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4o-2024-08-06
+  provider_id: openai
+  provider_model_id: gpt-4o-2024-08-06
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4o-mini
+  provider_id: openai
+  provider_model_id: gpt-4o-mini
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4o-audio-preview
+  provider_id: openai
+  provider_model_id: gpt-4o-audio-preview
+  model_type: llm
+- metadata: {}
+  model_id: chatgpt-4o-latest
+  provider_id: openai
+  provider_model_id: chatgpt-4o-latest
+  model_type: llm
+- metadata: {}
+  model_id: o1
+  provider_id: openai
+  provider_model_id: o1
+  model_type: llm
+- metadata: {}
+  model_id: o1-mini
+  provider_id: openai
+  provider_model_id: o1-mini
+  model_type: llm
+- metadata: {}
+  model_id: o3-mini
+  provider_id: openai
+  provider_model_id: o3-mini
+  model_type: llm
+- metadata: {}
+  model_id: o4-mini
+  provider_id: openai
+  provider_model_id: o4-mini
+  model_type: llm
 - metadata:
     embedding_dimension: 1536
     context_length: 8192
@@ -161,6 +239,20 @@ models:
   provider_id: openai
   provider_model_id: openai/text-embedding-3-large
   model_type: embedding
+- metadata:
+    embedding_dimension: 1536
+    context_length: 8192
+  model_id: text-embedding-3-small
+  provider_id: openai
+  provider_model_id: text-embedding-3-small
+  model_type: embedding
+- metadata:
+    embedding_dimension: 3072
+    context_length: 8192
+  model_id: text-embedding-3-large
+  provider_id: openai
+  provider_model_id: text-embedding-3-large
+  model_type: embedding
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
   provider_id: fireworks
@@ -386,6 +478,16 @@ models:
   provider_id: groq
   provider_model_id: groq/llama-4-scout-17b-16e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
 - metadata: {}
   model_id: groq/llama-4-maverick-17b-128e-instruct
   provider_id: groq
@@ -396,6 +498,116 @@ models:
   provider_id: groq
   provider_model_id: groq/llama-4-maverick-17b-128e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Meta-Llama-3.1-8B-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-8B-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Meta-Llama-3.1-405B-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Meta-Llama-3.2-1B-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-1B-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Meta-Llama-3.2-3B-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Meta-Llama-3.3-70B-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Llama-3.2-11B-Vision-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Llama-3.2-90B-Vision-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: sambanova
+  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Meta-Llama-Guard-3-8B
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-8B
+  provider_id: sambanova
+  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
+  model_type: llm
 - metadata:
     embedding_dimension: 384
   model_id: all-MiniLM-L6-v2
@@ -412,7 +624,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/starter/starter.py
similarity index 91%
rename from llama_stack/templates/dev/dev.py
rename to llama_stack/templates/starter/starter.py
index 69924acbe..0932bfdfe 100644
--- a/llama_stack/templates/dev/dev.py
+++ b/llama_stack/templates/starter/starter.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import List, Tuple
 
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
@@ -39,10 +38,15 @@ from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.models import (
     MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
 )
+from llama_stack.providers.remote.inference.sambanova.config import SambaNovaImplConfig
+from llama_stack.providers.remote.inference.sambanova.models import (
+    MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES,
+)
 from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.config import (
     PGVectorVectorIOConfig,
 )
+from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.templates.template import (
     DistributionTemplate,
     RunConfigSettings,
@@ -50,7 +54,7 @@ from llama_stack.templates.template import (
 )
 
 
-def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
+def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
     # in this template, we allow each API key to be optional
     providers = [
         (
@@ -78,6 +82,11 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
             GROQ_MODEL_ENTRIES,
             GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
         ),
+        (
+            "sambanova",
+            SAMBANOVA_MODEL_ENTRIES,
+            SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
+        ),
     ]
     inference_providers = []
     available_models = {}
@@ -107,12 +116,11 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
     }
-    name = "dev"
+    name = "starter"
 
     vector_io_providers = [
         Provider(
@@ -150,10 +158,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
     embedding_model = ModelInput(
         model_id="all-MiniLM-L6-v2",
@@ -168,7 +172,7 @@ def get_distribution_template() -> DistributionTemplate:
     return DistributionTemplate(
         name=name,
         distro_type="self_hosted",
-        description="Distribution for running e2e tests in CI",
+        description="Quick start template for running Llama Stack with several popular providers",
         container_image=None,
         template_path=None,
         providers=providers,
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index 92b1b534d..4013f08f9 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from pathlib import Path
-from typing import Dict, List, Literal, Optional, Tuple
+from typing import Literal
 
 import jinja2
 import yaml
@@ -28,12 +28,13 @@ from llama_stack.distribution.datatypes import (
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
 
 
 def get_model_registry(
-    available_models: Dict[str, List[ProviderModelEntry]],
-) -> List[ModelInput]:
+    available_models: dict[str, list[ProviderModelEntry]],
+) -> list[ModelInput]:
     models = []
     for provider_id, entries in available_models.items():
         for entry in entries:
@@ -57,18 +58,20 @@ class DefaultModel(BaseModel):
 
 
 class RunConfigSettings(BaseModel):
-    provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
-    default_models: Optional[List[ModelInput]] = None
-    default_shields: Optional[List[ShieldInput]] = None
-    default_tool_groups: Optional[List[ToolGroupInput]] = None
-    default_datasets: Optional[List[DatasetInput]] = None
-    default_benchmarks: Optional[List[BenchmarkInput]] = None
+    provider_overrides: dict[str, list[Provider]] = Field(default_factory=dict)
+    default_models: list[ModelInput] | None = None
+    default_shields: list[ShieldInput] | None = None
+    default_tool_groups: list[ToolGroupInput] | None = None
+    default_datasets: list[DatasetInput] | None = None
+    default_benchmarks: list[BenchmarkInput] | None = None
+    metadata_store: KVStoreConfig | None = None
+    inference_store: SqlStoreConfig | None = None
 
     def run_config(
         self,
         name: str,
-        providers: Dict[str, List[str]],
-        container_image: Optional[str] = None,
+        providers: dict[str, list[str]],
+        container_image: str | None = None,
     ) -> StackRunConfig:
         provider_registry = get_provider_registry()
 
@@ -113,10 +116,16 @@ class RunConfigSettings(BaseModel):
             container_image=container_image,
             apis=apis,
             providers=provider_configs,
-            metadata_store=SqliteKVStoreConfig.sample_run_config(
+            metadata_store=self.metadata_store
+            or SqliteKVStoreConfig.sample_run_config(
                 __distro_dir__=f"~/.llama/distributions/{name}",
                 db_name="registry.db",
             ),
+            inference_store=self.inference_store
+            or SqliteSqlStoreConfig.sample_run_config(
+                __distro_dir__=f"~/.llama/distributions/{name}",
+                db_name="inference_store.db",
+            ),
             models=self.default_models or [],
             shields=self.default_shields or [],
             tool_groups=self.default_tool_groups or [],
@@ -135,25 +144,31 @@ class DistributionTemplate(BaseModel):
     description: str
     distro_type: Literal["self_hosted", "remote_hosted", "ondevice"]
 
-    providers: Dict[str, List[str]]
-    run_configs: Dict[str, RunConfigSettings]
-    template_path: Optional[Path] = None
+    providers: dict[str, list[str]]
+    run_configs: dict[str, RunConfigSettings]
+    template_path: Path | None = None
 
     # Optional configuration
-    run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
-    container_image: Optional[str] = None
+    run_config_env_vars: dict[str, tuple[str, str]] | None = None
+    container_image: str | None = None
 
-    available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None
+    available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
 
     def build_config(self) -> BuildConfig:
+        additional_pip_packages: list[str] = []
+        for run_config in self.run_configs.values():
+            run_config_ = run_config.run_config(self.name, self.providers, self.container_image)
+            if run_config_.inference_store:
+                additional_pip_packages.extend(run_config_.inference_store.pip_packages)
+
         return BuildConfig(
-            name=self.name,
             distribution_spec=DistributionSpec(
                 description=self.description,
                 container_image=self.container_image,
                 providers=self.providers,
             ),
             image_type="conda",  # default to conda, can be overridden
+            additional_pip_packages=sorted(set(additional_pip_packages)),
         )
 
     def generate_markdown_docs(self) -> str:
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
index 9fe79647c..361b0b680 100644
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@@ -27,7 +27,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/tgi/doc_template.md b/llama_stack/templates/tgi/doc_template.md
index b69ccaa56..68b475893 100644
--- a/llama_stack/templates/tgi/doc_template.md
+++ b/llama_stack/templates/tgi/doc_template.md
@@ -105,7 +105,7 @@ docker run \
   -v ~/.llama:/root/.llama \
   -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
-  --yaml-config /root/my-run.yaml \
+  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env TGI_URL=http://host.docker.internal:$INFERENCE_PORT \
diff --git a/llama_stack/templates/tgi/report.md b/llama_stack/templates/tgi/report.md
deleted file mode 100644
index b0f5d88a2..000000000
--- a/llama_stack/templates/tgi/report.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Report for tgi distribution
-
-## Supported Models
-| Model Descriptor | tgi |
-|:---|:---|
-| Llama-3-8B-Instruct | ✅ |
-| Llama-3-70B-Instruct | ✅ |
-| Llama3.1-8B-Instruct | ✅ |
-| Llama3.1-70B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ✅ |
-| Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-11B-Vision-Instruct | ✅ |
-| Llama3.2-90B-Vision-Instruct | ✅ |
-| Llama3.3-70B-Instruct | ✅ |
-| Llama-Guard-3-11B-Vision | ✅ |
-| Llama-Guard-3-1B | ✅ |
-| Llama-Guard-3-8B | ✅ |
-| Llama-Guard-2-8B | ✅ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
-
-## Vector IO
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /create_agent_turn | rag | test_rag_agent | ✅ |
-| /create_agent_turn | custom_tool | test_custom_tool | ✅ |
-| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml
index 12d6bd284..c797b93aa 100644
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@@ -41,13 +41,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/tgi/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -93,9 +96,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -105,6 +105,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -125,7 +128,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
index 9f05c7584..7e91d20bd 100644
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@@ -40,13 +40,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/tgi/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -92,9 +95,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -104,6 +104,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -124,7 +127,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py
index 22dcc3995..2c97cbf80 100644
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@@ -34,7 +34,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -83,10 +82,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
index 834a3ecaf..5ffeac873 100644
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@@ -27,8 +27,10 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
     - remote::wolfram-alpha
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/together/report.md b/llama_stack/templates/together/report.md
deleted file mode 100644
index e125d5665..000000000
--- a/llama_stack/templates/together/report.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# Report for together distribution
-
-## Supported Models
-| Model Descriptor | together |
-|:---|:---|
-| Llama-3-8B-Instruct | ❌ |
-| Llama-3-70B-Instruct | ❌ |
-| Llama3.1-8B-Instruct | ✅ |
-| Llama3.1-70B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ❌ |
-| Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-11B-Vision-Instruct | ✅ |
-| Llama3.2-90B-Vision-Instruct | ✅ |
-| Llama3.3-70B-Instruct | ✅ |
-| Llama-Guard-3-11B-Vision | ✅ |
-| Llama-Guard-3-1B | ❌ |
-| Llama-Guard-3-8B | ✅ |
-| Llama-Guard-2-8B | ❌ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
-
-## Vector IO
-| Provider | API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|:-----|
-| inline::faiss | /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| Provider | API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|:-----|
-| inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
-| inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
-| inline::meta-reference | /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index 105ce896d..190a0400b 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -46,13 +46,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/together/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -98,9 +101,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -114,6 +114,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
@@ -270,8 +273,6 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
   provider_id: wolfram-alpha
 server:
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index 1f1613655..ce9542130 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -41,13 +41,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/together/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -93,9 +96,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -109,6 +109,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
@@ -260,8 +263,6 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
   provider_id: wolfram-alpha
 server:
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index a2bd87c97..7761bd9fd 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -39,7 +39,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
             "remote::wolfram-alpha",
@@ -74,10 +73,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
         ToolGroupInput(
             toolgroup_id="builtin::wolfram_alpha",
             provider_id="wolfram-alpha",
diff --git a/llama_stack/templates/verification/build.yaml b/llama_stack/templates/verification/build.yaml
index 9f010d651..ce083dbba 100644
--- a/llama_stack/templates/verification/build.yaml
+++ b/llama_stack/templates/verification/build.yaml
@@ -32,7 +32,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml
index b6c2ca98d..58b3c576c 100644
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@@ -74,13 +74,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/verification/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -126,9 +129,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -138,6 +138,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/inference_store.db
 models:
 - metadata: {}
   model_id: openai/gpt-4o
@@ -154,6 +157,76 @@ models:
   provider_id: openai
   provider_model_id: openai/chatgpt-4o-latest
   model_type: llm
+- metadata: {}
+  model_id: gpt-3.5-turbo-0125
+  provider_id: openai
+  provider_model_id: gpt-3.5-turbo-0125
+  model_type: llm
+- metadata: {}
+  model_id: gpt-3.5-turbo
+  provider_id: openai
+  provider_model_id: gpt-3.5-turbo
+  model_type: llm
+- metadata: {}
+  model_id: gpt-3.5-turbo-instruct
+  provider_id: openai
+  provider_model_id: gpt-3.5-turbo-instruct
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4
+  provider_id: openai
+  provider_model_id: gpt-4
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4-turbo
+  provider_id: openai
+  provider_model_id: gpt-4-turbo
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4o
+  provider_id: openai
+  provider_model_id: gpt-4o
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4o-2024-08-06
+  provider_id: openai
+  provider_model_id: gpt-4o-2024-08-06
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4o-mini
+  provider_id: openai
+  provider_model_id: gpt-4o-mini
+  model_type: llm
+- metadata: {}
+  model_id: gpt-4o-audio-preview
+  provider_id: openai
+  provider_model_id: gpt-4o-audio-preview
+  model_type: llm
+- metadata: {}
+  model_id: chatgpt-4o-latest
+  provider_id: openai
+  provider_model_id: chatgpt-4o-latest
+  model_type: llm
+- metadata: {}
+  model_id: o1
+  provider_id: openai
+  provider_model_id: o1
+  model_type: llm
+- metadata: {}
+  model_id: o1-mini
+  provider_id: openai
+  provider_model_id: o1-mini
+  model_type: llm
+- metadata: {}
+  model_id: o3-mini
+  provider_id: openai
+  provider_model_id: o3-mini
+  model_type: llm
+- metadata: {}
+  model_id: o4-mini
+  provider_id: openai
+  provider_model_id: o4-mini
+  model_type: llm
 - metadata:
     embedding_dimension: 1536
     context_length: 8192
@@ -168,6 +241,20 @@ models:
   provider_id: openai
   provider_model_id: openai/text-embedding-3-large
   model_type: embedding
+- metadata:
+    embedding_dimension: 1536
+    context_length: 8192
+  model_id: text-embedding-3-small
+  provider_id: openai
+  provider_model_id: text-embedding-3-small
+  model_type: embedding
+- metadata:
+    embedding_dimension: 3072
+    context_length: 8192
+  model_id: text-embedding-3-large
+  provider_id: openai
+  provider_model_id: text-embedding-3-large
+  model_type: embedding
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
   provider_id: fireworks-openai-compat
@@ -474,6 +561,16 @@ models:
   provider_id: groq-openai-compat
   provider_model_id: groq/llama-4-scout-17b-16e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  provider_id: groq-openai-compat
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: groq-openai-compat
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
 - metadata: {}
   model_id: groq/llama-4-maverick-17b-128e-instruct
   provider_id: groq-openai-compat
@@ -485,104 +582,114 @@ models:
   provider_model_id: groq/llama-4-maverick-17b-128e-instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-3.1-8B-Instruct
+  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  provider_id: groq-openai-compat
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: groq-openai-compat
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Meta-Llama-3.1-8B-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.1-8B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.1-8B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-3.1-70B-Instruct
+  model_id: sambanova/Meta-Llama-3.1-405B-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.1-70B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.1-70B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: Meta-Llama-3.1-405B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.1-405B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.1-405B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-3.2-1B-Instruct
+  model_id: sambanova/Meta-Llama-3.2-1B-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.2-1B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.2-1B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-3.2-3B-Instruct
+  model_id: sambanova/Meta-Llama-3.2-3B-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.2-3B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.2-3B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-3.3-70B-Instruct
+  model_id: sambanova/Meta-Llama-3.3-70B-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.3-70B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-3.3-70B-Instruct
+  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Llama-3.2-11B-Vision-Instruct
+  model_id: sambanova/Llama-3.2-11B-Vision-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Llama-3.2-11B-Vision-Instruct
+  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Llama-3.2-11B-Vision-Instruct
+  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Llama-3.2-90B-Vision-Instruct
+  model_id: sambanova/Llama-3.2-90B-Vision-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Llama-3.2-90B-Vision-Instruct
+  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Llama-3.2-90B-Vision-Instruct
+  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
   model_type: llm
 - metadata: {}
-  model_id: Meta-Llama-Guard-3-8B
+  model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: sambanova-openai-compat
-  provider_model_id: Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata: {}
-  model_id: Llama-4-Scout-17B-16E-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: Llama-4-Scout-17B-16E-Instruct
+  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
   provider_id: sambanova-openai-compat
-  provider_model_id: Llama-4-Scout-17B-16E-Instruct
+  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: sambanova-openai-compat
+  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: sambanova-openai-compat
+  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: sambanova/Meta-Llama-Guard-3-8B
+  provider_id: sambanova-openai-compat
+  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-8B
+  provider_id: sambanova-openai-compat
+  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
   model_type: llm
 - metadata: {}
   model_id: llama3.1-8b
@@ -620,7 +727,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/verification/verification.py b/llama_stack/templates/verification/verification.py
index e6f74aad8..b58400f26 100644
--- a/llama_stack/templates/verification/verification.py
+++ b/llama_stack/templates/verification/verification.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Dict, List, Tuple
 
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
@@ -51,7 +50,7 @@ from llama_stack.templates.template import (
 )
 
 
-def get_inference_providers() -> Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]:
+def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
     # in this template, we allow each API key to be optional
     providers = [
         (
@@ -113,7 +112,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -156,10 +154,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
     embedding_model = ModelInput(
         model_id="all-MiniLM-L6-v2",
diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml
index 8eb44dc1b..d5ff0f1f4 100644
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@@ -27,7 +27,9 @@ distribution_spec:
     tool_runtime:
     - remote::brave-search
     - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
     - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml
index a839aa2c5..6937e2bac 100644
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@@ -45,13 +45,16 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/vllm-gpu/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -97,9 +100,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -109,6 +109,9 @@ providers:
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -129,7 +132,5 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py
index 9bfeadc8d..5775138b1 100644
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@@ -31,7 +31,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             "remote::brave-search",
             "remote::tavily-search",
-            "inline::code-interpreter",
             "inline::rag-runtime",
             "remote::model-context-protocol",
         ],
@@ -75,10 +74,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/templates/watsonx/__init__.py b/llama_stack/templates/watsonx/__init__.py
new file mode 100644
index 000000000..078d86144
--- /dev/null
+++ b/llama_stack/templates/watsonx/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .watsonx import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/watsonx/build.yaml b/llama_stack/templates/watsonx/build.yaml
new file mode 100644
index 000000000..e68ace183
--- /dev/null
+++ b/llama_stack/templates/watsonx/build.yaml
@@ -0,0 +1,33 @@
+version: '2'
+distribution_spec:
+  description: Use watsonx for running LLM inference
+  providers:
+    inference:
+    - remote::watsonx
+    - inline::sentence-transformers
+    vector_io:
+    - inline::faiss
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::rag-runtime
+    - remote::model-context-protocol
+image_type: conda
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/watsonx/doc_template.md b/llama_stack/templates/watsonx/doc_template.md
new file mode 100644
index 000000000..f28dbf0bf
--- /dev/null
+++ b/llama_stack/templates/watsonx/doc_template.md
@@ -0,0 +1,74 @@
+---
+orphan: true
+---
+# watsonx Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
+
+{{ providers_table }}
+
+{% if run_config_env_vars  %}
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, (default_value, description) in run_config_env_vars.items() %}
+- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
+{% endfor %}
+{% endif %}
+
+{% if default_models %}
+### Models
+
+The following models are available by default:
+
+{% for model in default_models %}
+- `{{ model.model_id }} {{ model.doc_string }}`
+{% endfor %}
+{% endif %}
+
+
+### Prerequisite: API Keys
+
+Make sure you have access to a watsonx API Key. You can get one by referring [watsonx.ai](https://www.ibm.com/docs/en/masv-and-l/maximo-manage/continuous-delivery?topic=setup-create-watsonx-api-key).
+
+
+## Running Llama Stack with watsonx
+
+You can do this via Conda (build code), venv or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ./run.yaml:/root/my-run.yaml \
+  llamastack/distribution-{{ name }} \
+  --config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
+  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
+```
+
+### Via Conda
+
+```bash
+llama stack build --template watsonx --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
+```
diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml
new file mode 100644
index 000000000..e7222fd57
--- /dev/null
+++ b/llama_stack/templates/watsonx/run.yaml
@@ -0,0 +1,219 @@
+version: '2'
+image_name: watsonx
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: watsonx
+    provider_type: remote::watsonx
+    config:
+      url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:}
+      project_id: ${env.WATSONX_PROJECT_ID:}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/responses_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/inference_store.db
+models:
+- metadata: {}
+  model_id: meta-llama/llama-3-3-70b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-2-13b-chat
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-2-13b-chat
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-2-13b
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-2-13b-chat
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-1-70b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-70B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-1-8b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-8B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-11b-vision-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-1b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-1b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-1B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-1b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-3b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-90b-vision-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-guard-3-11b-vision
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-guard-3-11b-vision
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-guard-3-11b-vision
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321
diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py
new file mode 100644
index 000000000..802aaf8f1
--- /dev/null
+++ b/llama_stack/templates/watsonx/watsonx.py
@@ -0,0 +1,104 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
+from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::watsonx", "inline::sentence-transformers"],
+        "vector_io": ["inline::faiss"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+
+    inference_provider = Provider(
+        provider_id="watsonx",
+        provider_type="remote::watsonx",
+        config=WatsonXConfig.sample_run_config(),
+    )
+
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+
+    available_models = {
+        "watsonx": MODEL_ENTRIES,
+    }
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+    ]
+
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id="sentence-transformers",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+
+    default_models = get_model_registry(available_models)
+    return DistributionTemplate(
+        name="watsonx",
+        distro_type="remote_hosted",
+        description="Use watsonx for running LLM inference",
+        container_image=None,
+        template_path=Path(__file__).parent / "doc_template.md",
+        providers=providers,
+        available_models_by_provider=available_models,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider, embedding_provider],
+                },
+                default_models=default_models + [embedding_model],
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMASTACK_PORT": (
+                "5001",
+                "Port for the Llama Stack distribution server",
+            ),
+            "WATSONX_API_KEY": (
+                "",
+                "watsonx API Key",
+            ),
+            "WATSONX_PROJECT_ID": (
+                "",
+                "watsonx Project ID",
+            ),
+        },
+    )
diff --git a/llama_stack/ui/.gitignore b/llama_stack/ui/.gitignore
new file mode 100644
index 000000000..5ef6a5207
--- /dev/null
+++ b/llama_stack/ui/.gitignore
@@ -0,0 +1,41 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# env files (can opt-in for committing if needed)
+.env*
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
diff --git a/llama_stack/ui/.prettierignore b/llama_stack/ui/.prettierignore
new file mode 100644
index 000000000..1b8ac8894
--- /dev/null
+++ b/llama_stack/ui/.prettierignore
@@ -0,0 +1,3 @@
+# Ignore artifacts:
+build
+coverage
diff --git a/llama_stack/ui/.prettierrc b/llama_stack/ui/.prettierrc
new file mode 100644
index 000000000..0967ef424
--- /dev/null
+++ b/llama_stack/ui/.prettierrc
@@ -0,0 +1 @@
+{}
diff --git a/llama_stack/ui/README.md b/llama_stack/ui/README.md
new file mode 100644
index 000000000..b6f803509
--- /dev/null
+++ b/llama_stack/ui/README.md
@@ -0,0 +1,25 @@
+## This is WIP.
+
+We use shadcdn/ui [Shadcn UI](https://ui.shadcn.com/) for the UI components.
+
+## Getting Started
+
+First, install dependencies:
+
+```bash
+npm install
+```
+
+Then, run the development server:
+
+```bash
+npm run dev
+# or
+yarn dev
+# or
+pnpm dev
+# or
+bun dev
+```
+
+Open [http://localhost:8322](http://localhost:8322) with your browser to see the result.
diff --git a/llama_stack/ui/app/favicon.ico b/llama_stack/ui/app/favicon.ico
new file mode 100644
index 000000000..718d6fea4
Binary files /dev/null and b/llama_stack/ui/app/favicon.ico differ
diff --git a/llama_stack/ui/app/globals.css b/llama_stack/ui/app/globals.css
new file mode 100644
index 000000000..dc98be74c
--- /dev/null
+++ b/llama_stack/ui/app/globals.css
@@ -0,0 +1,122 @@
+@import "tailwindcss";
+@import "tw-animate-css";
+
+@custom-variant dark (&:is(.dark *));
+
+@theme inline {
+  --color-background: var(--background);
+  --color-foreground: var(--foreground);
+  --font-sans: var(--font-geist-sans);
+  --font-mono: var(--font-geist-mono);
+  --color-sidebar-ring: var(--sidebar-ring);
+  --color-sidebar-border: var(--sidebar-border);
+  --color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
+  --color-sidebar-accent: var(--sidebar-accent);
+  --color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
+  --color-sidebar-primary: var(--sidebar-primary);
+  --color-sidebar-foreground: var(--sidebar-foreground);
+  --color-sidebar: var(--sidebar);
+  --color-chart-5: var(--chart-5);
+  --color-chart-4: var(--chart-4);
+  --color-chart-3: var(--chart-3);
+  --color-chart-2: var(--chart-2);
+  --color-chart-1: var(--chart-1);
+  --color-ring: var(--ring);
+  --color-input: var(--input);
+  --color-border: var(--border);
+  --color-destructive: var(--destructive);
+  --color-accent-foreground: var(--accent-foreground);
+  --color-accent: var(--accent);
+  --color-muted-foreground: var(--muted-foreground);
+  --color-muted: var(--muted);
+  --color-secondary-foreground: var(--secondary-foreground);
+  --color-secondary: var(--secondary);
+  --color-primary-foreground: var(--primary-foreground);
+  --color-primary: var(--primary);
+  --color-popover-foreground: var(--popover-foreground);
+  --color-popover: var(--popover);
+  --color-card-foreground: var(--card-foreground);
+  --color-card: var(--card);
+  --radius-sm: calc(var(--radius) - 4px);
+  --radius-md: calc(var(--radius) - 2px);
+  --radius-lg: var(--radius);
+  --radius-xl: calc(var(--radius) + 4px);
+}
+
+:root {
+  --radius: 0.625rem;
+  --background: oklch(1 0 0);
+  --foreground: oklch(0.145 0 0);
+  --card: oklch(1 0 0);
+  --card-foreground: oklch(0.145 0 0);
+  --popover: oklch(1 0 0);
+  --popover-foreground: oklch(0.145 0 0);
+  --primary: oklch(0.205 0 0);
+  --primary-foreground: oklch(0.985 0 0);
+  --secondary: oklch(0.97 0 0);
+  --secondary-foreground: oklch(0.205 0 0);
+  --muted: oklch(0.97 0 0);
+  --muted-foreground: oklch(0.556 0 0);
+  --accent: oklch(0.97 0 0);
+  --accent-foreground: oklch(0.205 0 0);
+  --destructive: oklch(0.577 0.245 27.325);
+  --border: oklch(0.922 0 0);
+  --input: oklch(0.922 0 0);
+  --ring: oklch(0.708 0 0);
+  --chart-1: oklch(0.646 0.222 41.116);
+  --chart-2: oklch(0.6 0.118 184.704);
+  --chart-3: oklch(0.398 0.07 227.392);
+  --chart-4: oklch(0.828 0.189 84.429);
+  --chart-5: oklch(0.769 0.188 70.08);
+  --sidebar: oklch(0.985 0 0);
+  --sidebar-foreground: oklch(0.145 0 0);
+  --sidebar-primary: oklch(0.205 0 0);
+  --sidebar-primary-foreground: oklch(0.985 0 0);
+  --sidebar-accent: oklch(0.97 0 0);
+  --sidebar-accent-foreground: oklch(0.205 0 0);
+  --sidebar-border: oklch(0.922 0 0);
+  --sidebar-ring: oklch(0.708 0 0);
+}
+
+.dark {
+  --background: oklch(0.145 0 0);
+  --foreground: oklch(0.985 0 0);
+  --card: oklch(0.205 0 0);
+  --card-foreground: oklch(0.985 0 0);
+  --popover: oklch(0.205 0 0);
+  --popover-foreground: oklch(0.985 0 0);
+  --primary: oklch(0.922 0 0);
+  --primary-foreground: oklch(0.205 0 0);
+  --secondary: oklch(0.269 0 0);
+  --secondary-foreground: oklch(0.985 0 0);
+  --muted: oklch(0.269 0 0);
+  --muted-foreground: oklch(0.708 0 0);
+  --accent: oklch(0.269 0 0);
+  --accent-foreground: oklch(0.985 0 0);
+  --destructive: oklch(0.704 0.191 22.216);
+  --border: oklch(1 0 0 / 10%);
+  --input: oklch(1 0 0 / 15%);
+  --ring: oklch(0.556 0 0);
+  --chart-1: oklch(0.488 0.243 264.376);
+  --chart-2: oklch(0.696 0.17 162.48);
+  --chart-3: oklch(0.769 0.188 70.08);
+  --chart-4: oklch(0.627 0.265 303.9);
+  --chart-5: oklch(0.645 0.246 16.439);
+  --sidebar: oklch(0.205 0 0);
+  --sidebar-foreground: oklch(0.985 0 0);
+  --sidebar-primary: oklch(0.488 0.243 264.376);
+  --sidebar-primary-foreground: oklch(0.985 0 0);
+  --sidebar-accent: oklch(0.269 0 0);
+  --sidebar-accent-foreground: oklch(0.985 0 0);
+  --sidebar-border: oklch(1 0 0 / 10%);
+  --sidebar-ring: oklch(0.556 0 0);
+}
+
+@layer base {
+  * {
+    @apply border-border outline-ring/50;
+  }
+  body {
+    @apply bg-background text-foreground;
+  }
+}
diff --git a/llama_stack/ui/app/layout.tsx b/llama_stack/ui/app/layout.tsx
new file mode 100644
index 000000000..ed8a6cd5d
--- /dev/null
+++ b/llama_stack/ui/app/layout.tsx
@@ -0,0 +1,55 @@
+import type { Metadata } from "next";
+import { ThemeProvider } from "@/components/ui/theme-provider";
+import { Geist, Geist_Mono } from "next/font/google";
+import { ModeToggle } from "@/components/ui/mode-toggle";
+import "./globals.css";
+
+const geistSans = Geist({
+  variable: "--font-geist-sans",
+  subsets: ["latin"],
+});
+
+const geistMono = Geist_Mono({
+  variable: "--font-geist-mono",
+  subsets: ["latin"],
+});
+
+export const metadata: Metadata = {
+  title: "Llama Stack",
+  description: "Llama Stack UI",
+};
+
+import { SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar";
+import { AppSidebar } from "@/components/layout/app-sidebar";
+
+export default function Layout({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en" suppressHydrationWarning>
+      <body className={`${geistSans.variable} ${geistMono.variable} font-sans`}>
+        <ThemeProvider
+          attribute="class"
+          defaultTheme="system"
+          enableSystem
+          disableTransitionOnChange
+        >
+          <SidebarProvider>
+            <AppSidebar />
+            <main className="flex flex-col flex-1">
+              {/* Header with aligned elements */}
+              <div className="flex items-center p-4 border-b">
+                <div className="flex-none">
+                  <SidebarTrigger />
+                </div>
+                <div className="flex-1 text-center"></div>
+                <div className="flex-none">
+                  <ModeToggle />
+                </div>
+              </div>
+              <div className="flex flex-col flex-1 p-4">{children}</div>
+            </main>
+          </SidebarProvider>
+        </ThemeProvider>
+      </body>
+    </html>
+  );
+}
diff --git a/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx b/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
new file mode 100644
index 000000000..e6feef363
--- /dev/null
+++ b/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
@@ -0,0 +1,58 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import { useParams } from "next/navigation";
+import { ChatCompletion } from "@/lib/types";
+import { ChatCompletionDetailView } from "@/components/chat-completions/chat-completion-detail";
+import { client } from "@/lib/client";
+
+export default function ChatCompletionDetailPage() {
+  const params = useParams();
+  const id = params.id as string;
+
+  const [completionDetail, setCompletionDetail] =
+    useState<ChatCompletion | null>(null);
+  const [isLoading, setIsLoading] = useState<boolean>(true);
+  const [error, setError] = useState<Error | null>(null);
+
+  useEffect(() => {
+    if (!id) {
+      setError(new Error("Completion ID is missing."));
+      setIsLoading(false);
+      return;
+    }
+
+    const fetchCompletionDetail = async () => {
+      setIsLoading(true);
+      setError(null);
+      setCompletionDetail(null);
+      try {
+        const response = await client.chat.completions.retrieve(id);
+        setCompletionDetail(response as ChatCompletion);
+      } catch (err) {
+        console.error(
+          `Error fetching chat completion detail for ID ${id}:`,
+          err,
+        );
+        setError(
+          err instanceof Error
+            ? err
+            : new Error("Failed to fetch completion detail"),
+        );
+      } finally {
+        setIsLoading(false);
+      }
+    };
+
+    fetchCompletionDetail();
+  }, [id]);
+
+  return (
+    <ChatCompletionDetailView
+      completion={completionDetail}
+      isLoading={isLoading}
+      error={error}
+      id={id}
+    />
+  );
+}
diff --git a/llama_stack/ui/app/logs/chat-completions/layout.tsx b/llama_stack/ui/app/logs/chat-completions/layout.tsx
new file mode 100644
index 000000000..f4dbfc782
--- /dev/null
+++ b/llama_stack/ui/app/logs/chat-completions/layout.tsx
@@ -0,0 +1,19 @@
+"use client";
+
+import React from "react";
+import LogsLayout from "@/components/layout/logs-layout";
+
+export default function ChatCompletionsLayout({
+  children,
+}: {
+  children: React.ReactNode;
+}) {
+  return (
+    <LogsLayout
+      sectionLabel="Chat Completions"
+      basePath="/logs/chat-completions"
+    >
+      {children}
+    </LogsLayout>
+  );
+}
diff --git a/llama_stack/ui/app/logs/chat-completions/page.tsx b/llama_stack/ui/app/logs/chat-completions/page.tsx
new file mode 100644
index 000000000..5bbfcce94
--- /dev/null
+++ b/llama_stack/ui/app/logs/chat-completions/page.tsx
@@ -0,0 +1,51 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import { ChatCompletion } from "@/lib/types";
+import { ChatCompletionsTable } from "@/components/chat-completions/chat-completions-table";
+import { client } from "@/lib/client";
+
+export default function ChatCompletionsPage() {
+  const [completions, setCompletions] = useState<ChatCompletion[]>([]);
+  const [isLoading, setIsLoading] = useState<boolean>(true);
+  const [error, setError] = useState<Error | null>(null);
+
+  useEffect(() => {
+    const fetchCompletions = async () => {
+      setIsLoading(true);
+      setError(null);
+      try {
+        const response = await client.chat.completions.list();
+        const data = Array.isArray(response)
+          ? response
+          : (response as { data: ChatCompletion[] }).data;
+
+        if (Array.isArray(data)) {
+          setCompletions(data);
+        } else {
+          console.error("Unexpected response structure:", response);
+          setError(new Error("Unexpected response structure"));
+          setCompletions([]);
+        }
+      } catch (err) {
+        console.error("Error fetching chat completions:", err);
+        setError(
+          err instanceof Error ? err : new Error("Failed to fetch completions"),
+        );
+        setCompletions([]);
+      } finally {
+        setIsLoading(false);
+      }
+    };
+
+    fetchCompletions();
+  }, []);
+
+  return (
+    <ChatCompletionsTable
+      data={completions}
+      isLoading={isLoading}
+      error={error}
+    />
+  );
+}
diff --git a/llama_stack/ui/app/logs/responses/[id]/page.tsx b/llama_stack/ui/app/logs/responses/[id]/page.tsx
new file mode 100644
index 000000000..efe6f0ff3
--- /dev/null
+++ b/llama_stack/ui/app/logs/responses/[id]/page.tsx
@@ -0,0 +1,125 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import { useParams } from "next/navigation";
+import type { ResponseObject } from "llama-stack-client/resources/responses/responses";
+import { OpenAIResponse, InputItemListResponse } from "@/lib/types";
+import { ResponseDetailView } from "@/components/responses/responses-detail";
+import { client } from "@/lib/client";
+
+export default function ResponseDetailPage() {
+  const params = useParams();
+  const id = params.id as string;
+
+  const [responseDetail, setResponseDetail] = useState<OpenAIResponse | null>(
+    null,
+  );
+  const [inputItems, setInputItems] = useState<InputItemListResponse | null>(
+    null,
+  );
+  const [isLoading, setIsLoading] = useState<boolean>(true);
+  const [isLoadingInputItems, setIsLoadingInputItems] = useState<boolean>(true);
+  const [error, setError] = useState<Error | null>(null);
+  const [inputItemsError, setInputItemsError] = useState<Error | null>(null);
+
+  // Helper function to convert ResponseObject to OpenAIResponse
+  const convertResponseObject = (
+    responseData: ResponseObject,
+  ): OpenAIResponse => {
+    return {
+      id: responseData.id,
+      created_at: responseData.created_at,
+      model: responseData.model,
+      object: responseData.object,
+      status: responseData.status,
+      output: responseData.output as OpenAIResponse["output"],
+      input: [], // ResponseObject doesn't include input; component uses inputItems prop instead
+      error: responseData.error,
+      parallel_tool_calls: responseData.parallel_tool_calls,
+      previous_response_id: responseData.previous_response_id,
+      temperature: responseData.temperature,
+      top_p: responseData.top_p,
+      truncation: responseData.truncation,
+      user: responseData.user,
+    };
+  };
+
+  useEffect(() => {
+    if (!id) {
+      setError(new Error("Response ID is missing."));
+      setIsLoading(false);
+      return;
+    }
+
+    const fetchResponseDetail = async () => {
+      setIsLoading(true);
+      setIsLoadingInputItems(true);
+      setError(null);
+      setInputItemsError(null);
+      setResponseDetail(null);
+      setInputItems(null);
+
+      try {
+        const [responseResult, inputItemsResult] = await Promise.allSettled([
+          client.responses.retrieve(id),
+          client.responses.inputItems.list(id, { order: "asc" }),
+        ]);
+
+        // Handle response detail result
+        if (responseResult.status === "fulfilled") {
+          const convertedResponse = convertResponseObject(responseResult.value);
+          setResponseDetail(convertedResponse);
+        } else {
+          console.error(
+            `Error fetching response detail for ID ${id}:`,
+            responseResult.reason,
+          );
+          setError(
+            responseResult.reason instanceof Error
+              ? responseResult.reason
+              : new Error("Failed to fetch response detail"),
+          );
+        }
+
+        // Handle input items result
+        if (inputItemsResult.status === "fulfilled") {
+          const inputItemsData =
+            inputItemsResult.value as unknown as InputItemListResponse;
+          setInputItems(inputItemsData);
+        } else {
+          console.error(
+            `Error fetching input items for response ID ${id}:`,
+            inputItemsResult.reason,
+          );
+          setInputItemsError(
+            inputItemsResult.reason instanceof Error
+              ? inputItemsResult.reason
+              : new Error("Failed to fetch input items"),
+          );
+        }
+      } catch (err) {
+        console.error(`Unexpected error fetching data for ID ${id}:`, err);
+        setError(
+          err instanceof Error ? err : new Error("Unexpected error occurred"),
+        );
+      } finally {
+        setIsLoading(false);
+        setIsLoadingInputItems(false);
+      }
+    };
+
+    fetchResponseDetail();
+  }, [id]);
+
+  return (
+    <ResponseDetailView
+      response={responseDetail}
+      inputItems={inputItems}
+      isLoading={isLoading}
+      isLoadingInputItems={isLoadingInputItems}
+      error={error}
+      inputItemsError={inputItemsError}
+      id={id}
+    />
+  );
+}
diff --git a/llama_stack/ui/app/logs/responses/layout.tsx b/llama_stack/ui/app/logs/responses/layout.tsx
new file mode 100644
index 000000000..1fe116e5e
--- /dev/null
+++ b/llama_stack/ui/app/logs/responses/layout.tsx
@@ -0,0 +1,16 @@
+"use client";
+
+import React from "react";
+import LogsLayout from "@/components/layout/logs-layout";
+
+export default function ResponsesLayout({
+  children,
+}: {
+  children: React.ReactNode;
+}) {
+  return (
+    <LogsLayout sectionLabel="Responses" basePath="/logs/responses">
+      {children}
+    </LogsLayout>
+  );
+}
diff --git a/llama_stack/ui/app/logs/responses/page.tsx b/llama_stack/ui/app/logs/responses/page.tsx
new file mode 100644
index 000000000..dab0c735f
--- /dev/null
+++ b/llama_stack/ui/app/logs/responses/page.tsx
@@ -0,0 +1,66 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import type { ResponseListResponse } from "llama-stack-client/resources/responses/responses";
+import { OpenAIResponse } from "@/lib/types";
+import { ResponsesTable } from "@/components/responses/responses-table";
+import { client } from "@/lib/client";
+
+export default function ResponsesPage() {
+  const [responses, setResponses] = useState<OpenAIResponse[]>([]);
+  const [isLoading, setIsLoading] = useState<boolean>(true);
+  const [error, setError] = useState<Error | null>(null);
+
+  // Helper function to convert ResponseListResponse.Data to OpenAIResponse
+  const convertResponseListData = (
+    responseData: ResponseListResponse.Data,
+  ): OpenAIResponse => {
+    return {
+      id: responseData.id,
+      created_at: responseData.created_at,
+      model: responseData.model,
+      object: responseData.object,
+      status: responseData.status,
+      output: responseData.output as OpenAIResponse["output"],
+      input: responseData.input as OpenAIResponse["input"],
+      error: responseData.error,
+      parallel_tool_calls: responseData.parallel_tool_calls,
+      previous_response_id: responseData.previous_response_id,
+      temperature: responseData.temperature,
+      top_p: responseData.top_p,
+      truncation: responseData.truncation,
+      user: responseData.user,
+    };
+  };
+
+  useEffect(() => {
+    const fetchResponses = async () => {
+      setIsLoading(true);
+      setError(null);
+      try {
+        const response = await client.responses.list();
+        const responseListData = response as ResponseListResponse;
+
+        const convertedResponses: OpenAIResponse[] = responseListData.data.map(
+          convertResponseListData,
+        );
+
+        setResponses(convertedResponses);
+      } catch (err) {
+        console.error("Error fetching responses:", err);
+        setError(
+          err instanceof Error ? err : new Error("Failed to fetch responses"),
+        );
+        setResponses([]);
+      } finally {
+        setIsLoading(false);
+      }
+    };
+
+    fetchResponses();
+  }, []);
+
+  return (
+    <ResponsesTable data={responses} isLoading={isLoading} error={error} />
+  );
+}
diff --git a/llama_stack/ui/app/page.tsx b/llama_stack/ui/app/page.tsx
new file mode 100644
index 000000000..d1d781bdb
--- /dev/null
+++ b/llama_stack/ui/app/page.tsx
@@ -0,0 +1,7 @@
+export default function Home() {
+  return (
+    <div className="mt-8">
+      <h1>Welcome to Llama Stack!</h1>
+    </div>
+  );
+}
diff --git a/llama_stack/ui/components.json b/llama_stack/ui/components.json
new file mode 100644
index 000000000..4ee62ee10
--- /dev/null
+++ b/llama_stack/ui/components.json
@@ -0,0 +1,21 @@
+{
+  "$schema": "https://ui.shadcn.com/schema.json",
+  "style": "new-york",
+  "rsc": true,
+  "tsx": true,
+  "tailwind": {
+    "config": "",
+    "css": "app/globals.css",
+    "baseColor": "neutral",
+    "cssVariables": true,
+    "prefix": ""
+  },
+  "aliases": {
+    "components": "@/components",
+    "utils": "@/lib/utils",
+    "ui": "@/components/ui",
+    "lib": "@/lib",
+    "hooks": "@/hooks"
+  },
+  "iconLibrary": "lucide"
+}
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx b/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
new file mode 100644
index 000000000..5348dbc3a
--- /dev/null
+++ b/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
@@ -0,0 +1,193 @@
+import React from "react";
+import { render, screen } from "@testing-library/react";
+import "@testing-library/jest-dom";
+import { ChatCompletionDetailView } from "./chat-completion-detail";
+import { ChatCompletion } from "@/lib/types";
+
+// Initial test file setup for ChatCompletionDetailView
+
+describe("ChatCompletionDetailView", () => {
+  test("renders skeleton UI when isLoading is true", () => {
+    const { container } = render(
+      <ChatCompletionDetailView
+        completion={null}
+        isLoading={true}
+        error={null}
+        id="test-id"
+      />,
+    );
+    // Use the data-slot attribute for Skeletons
+    const skeletons = container.querySelectorAll('[data-slot="skeleton"]');
+    expect(skeletons.length).toBeGreaterThan(0);
+  });
+
+  test("renders error message when error prop is provided", () => {
+    render(
+      <ChatCompletionDetailView
+        completion={null}
+        isLoading={false}
+        error={{ name: "Error", message: "Network Error" }}
+        id="err-id"
+      />,
+    );
+    expect(
+      screen.getByText(/Error loading details for ID err-id: Network Error/),
+    ).toBeInTheDocument();
+  });
+
+  test("renders default error message when error.message is empty", () => {
+    render(
+      <ChatCompletionDetailView
+        completion={null}
+        isLoading={false}
+        error={{ name: "Error", message: "" }}
+        id="err-id"
+      />,
+    );
+    // Use regex to match the error message regardless of whitespace
+    expect(
+      screen.getByText(/Error loading details for ID\s*err-id\s*:/),
+    ).toBeInTheDocument();
+  });
+
+  test("renders error message when error prop is an object without message", () => {
+    render(
+      <ChatCompletionDetailView
+        completion={null}
+        isLoading={false}
+        error={{} as Error}
+        id="err-id"
+      />,
+    );
+    // Use regex to match the error message regardless of whitespace
+    expect(
+      screen.getByText(/Error loading details for ID\s*err-id\s*:/),
+    ).toBeInTheDocument();
+  });
+
+  test("renders not found message when completion is null and not loading/error", () => {
+    render(
+      <ChatCompletionDetailView
+        completion={null}
+        isLoading={false}
+        error={null}
+        id="notfound-id"
+      />,
+    );
+    expect(
+      screen.getByText("No details found for ID: notfound-id."),
+    ).toBeInTheDocument();
+  });
+
+  test("renders input, output, and properties for valid completion", () => {
+    const mockCompletion: ChatCompletion = {
+      id: "comp_123",
+      object: "chat.completion",
+      created: 1710000000,
+      model: "llama-test-model",
+      choices: [
+        {
+          index: 0,
+          message: { role: "assistant", content: "Test output" },
+          finish_reason: "stop",
+        },
+      ],
+      input_messages: [{ role: "user", content: "Test input" }],
+    };
+    render(
+      <ChatCompletionDetailView
+        completion={mockCompletion}
+        isLoading={false}
+        error={null}
+        id={mockCompletion.id}
+      />,
+    );
+    // Input
+    expect(screen.getByText("Input")).toBeInTheDocument();
+    expect(screen.getByText("Test input")).toBeInTheDocument();
+    // Output
+    expect(screen.getByText("Output")).toBeInTheDocument();
+    expect(screen.getByText("Test output")).toBeInTheDocument();
+    // Properties
+    expect(screen.getByText("Properties")).toBeInTheDocument();
+    expect(screen.getByText("Created:")).toBeInTheDocument();
+    expect(
+      screen.getByText(new Date(1710000000 * 1000).toLocaleString()),
+    ).toBeInTheDocument();
+    expect(screen.getByText("ID:")).toBeInTheDocument();
+    expect(screen.getByText("comp_123")).toBeInTheDocument();
+    expect(screen.getByText("Model:")).toBeInTheDocument();
+    expect(screen.getByText("llama-test-model")).toBeInTheDocument();
+    expect(screen.getByText("Finish Reason:")).toBeInTheDocument();
+    expect(screen.getByText("stop")).toBeInTheDocument();
+  });
+
+  test("renders tool call in output and properties when present", () => {
+    const toolCall = {
+      function: { name: "search", arguments: '{"query":"llama"}' },
+    };
+    const mockCompletion: ChatCompletion = {
+      id: "comp_tool",
+      object: "chat.completion",
+      created: 1710001000,
+      model: "llama-tool-model",
+      choices: [
+        {
+          index: 0,
+          message: {
+            role: "assistant",
+            content: "Tool output",
+            tool_calls: [toolCall],
+          },
+          finish_reason: "stop",
+        },
+      ],
+      input_messages: [{ role: "user", content: "Tool input" }],
+    };
+    render(
+      <ChatCompletionDetailView
+        completion={mockCompletion}
+        isLoading={false}
+        error={null}
+        id={mockCompletion.id}
+      />,
+    );
+    // Output should include the tool call block (should be present twice: input and output)
+    const toolCallLabels = screen.getAllByText("Tool Call");
+    expect(toolCallLabels.length).toBeGreaterThanOrEqual(1); // At least one, but could be two
+    // The tool call block should contain the formatted tool call string in both input and output
+    const toolCallBlocks = screen.getAllByText('search({"query":"llama"})');
+    expect(toolCallBlocks.length).toBe(2);
+    // Properties should include the tool call name
+    expect(screen.getByText("Functions/Tools Called:")).toBeInTheDocument();
+    expect(screen.getByText("search")).toBeInTheDocument();
+  });
+
+  test("handles missing/empty fields gracefully", () => {
+    const mockCompletion: ChatCompletion = {
+      id: "comp_edge",
+      object: "chat.completion",
+      created: 1710002000,
+      model: "llama-edge-model",
+      choices: [], // No choices
+      input_messages: [], // No input messages
+    };
+    render(
+      <ChatCompletionDetailView
+        completion={mockCompletion}
+        isLoading={false}
+        error={null}
+        id={mockCompletion.id}
+      />,
+    );
+    // Input section should be present but empty
+    expect(screen.getByText("Input")).toBeInTheDocument();
+    // Output section should show fallback message
+    expect(
+      screen.getByText("No message found in assistant's choice."),
+    ).toBeInTheDocument();
+    // Properties should show N/A for finish reason
+    expect(screen.getByText("Finish Reason:")).toBeInTheDocument();
+    expect(screen.getByText("N/A")).toBeInTheDocument();
+  });
+});
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx b/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
new file mode 100644
index 000000000..200807864
--- /dev/null
+++ b/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
@@ -0,0 +1,145 @@
+"use client";
+
+import { ChatMessage, ChatCompletion } from "@/lib/types";
+import { ChatMessageItem } from "@/components/chat-completions/chat-messasge-item";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import {
+  DetailLoadingView,
+  DetailErrorView,
+  DetailNotFoundView,
+  DetailLayout,
+  PropertiesCard,
+  PropertyItem,
+} from "@/components/layout/detail-layout";
+
+interface ChatCompletionDetailViewProps {
+  completion: ChatCompletion | null;
+  isLoading: boolean;
+  error: Error | null;
+  id: string;
+}
+
+export function ChatCompletionDetailView({
+  completion,
+  isLoading,
+  error,
+  id,
+}: ChatCompletionDetailViewProps) {
+  const title = "Chat Completion Details";
+
+  if (error) {
+    return <DetailErrorView title={title} id={id} error={error} />;
+  }
+
+  if (isLoading) {
+    return <DetailLoadingView title={title} />;
+  }
+
+  if (!completion) {
+    return <DetailNotFoundView title={title} id={id} />;
+  }
+
+  // Main content cards
+  const mainContent = (
+    <>
+      <Card>
+        <CardHeader>
+          <CardTitle>Input</CardTitle>
+        </CardHeader>
+        <CardContent>
+          {completion.input_messages?.map((msg, index) => (
+            <ChatMessageItem key={`input-msg-${index}`} message={msg} />
+          ))}
+          {completion.choices?.[0]?.message?.tool_calls &&
+          Array.isArray(completion.choices[0].message.tool_calls) &&
+          !completion.input_messages?.some(
+            (im) =>
+              im.role === "assistant" &&
+              im.tool_calls &&
+              Array.isArray(im.tool_calls) &&
+              im.tool_calls.length > 0,
+          )
+            ? completion.choices[0].message.tool_calls.map(
+                (toolCall: any, index: number) => {
+                  const assistantToolCallMessage: ChatMessage = {
+                    role: "assistant",
+                    tool_calls: [toolCall],
+                    content: "", // Ensure content is defined, even if empty
+                  };
+                  return (
+                    <ChatMessageItem
+                      key={`choice-tool-call-${index}`}
+                      message={assistantToolCallMessage}
+                    />
+                  );
+                },
+              )
+            : null}
+        </CardContent>
+      </Card>
+
+      <Card>
+        <CardHeader>
+          <CardTitle>Output</CardTitle>
+        </CardHeader>
+        <CardContent>
+          {completion.choices?.[0]?.message ? (
+            <ChatMessageItem
+              message={completion.choices[0].message as ChatMessage}
+            />
+          ) : (
+            <p className="text-gray-500 italic text-sm">
+              No message found in assistant's choice.
+            </p>
+          )}
+        </CardContent>
+      </Card>
+    </>
+  );
+
+  // Properties sidebar
+  const sidebar = (
+    <PropertiesCard>
+      <PropertyItem
+        label="Created"
+        value={new Date(completion.created * 1000).toLocaleString()}
+      />
+      <PropertyItem label="ID" value={completion.id} />
+      <PropertyItem label="Model" value={completion.model} />
+      <PropertyItem
+        label="Finish Reason"
+        value={completion.choices?.[0]?.finish_reason || "N/A"}
+        hasBorder
+      />
+      {(() => {
+        const toolCalls = completion.choices?.[0]?.message?.tool_calls;
+        if (toolCalls && Array.isArray(toolCalls) && toolCalls.length > 0) {
+          return (
+            <PropertyItem
+              label="Functions/Tools Called"
+              value={
+                <div>
+                  <ul className="list-disc list-inside pl-4 mt-1">
+                    {toolCalls.map((toolCall: any, index: number) => (
+                      <li key={index}>
+                        <span className="text-gray-900 font-medium">
+                          {toolCall.function?.name || "N/A"}
+                        </span>
+                      </li>
+                    ))}
+                  </ul>
+                </div>
+              }
+              hasBorder
+            />
+          );
+        }
+        return null;
+      })()}
+    </PropertiesCard>
+  );
+
+  return (
+    <DetailLayout title={title} mainContent={mainContent} sidebar={sidebar} />
+  );
+}
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx b/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
new file mode 100644
index 000000000..c8a55b100
--- /dev/null
+++ b/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
@@ -0,0 +1,347 @@
+import React from "react";
+import { render, screen, fireEvent } from "@testing-library/react";
+import "@testing-library/jest-dom";
+import { ChatCompletionsTable } from "./chat-completions-table";
+import { ChatCompletion } from "@/lib/types";
+
+// Mock next/navigation
+const mockPush = jest.fn();
+jest.mock("next/navigation", () => ({
+  useRouter: () => ({
+    push: mockPush,
+  }),
+}));
+
+// Mock helper functions
+jest.mock("@/lib/truncate-text");
+jest.mock("@/lib/format-message-content");
+
+// Import the mocked functions to set up default or specific implementations
+import { truncateText as originalTruncateText } from "@/lib/truncate-text";
+import {
+  extractTextFromContentPart as originalExtractTextFromContentPart,
+  extractDisplayableText as originalExtractDisplayableText,
+} from "@/lib/format-message-content";
+
+// Cast to jest.Mock for typings
+const truncateText = originalTruncateText as jest.Mock;
+const extractTextFromContentPart =
+  originalExtractTextFromContentPart as jest.Mock;
+const extractDisplayableText = originalExtractDisplayableText as jest.Mock;
+
+describe("ChatCompletionsTable", () => {
+  const defaultProps = {
+    data: [] as ChatCompletion[],
+    isLoading: false,
+    error: null,
+  };
+
+  beforeEach(() => {
+    // Reset all mocks before each test
+    mockPush.mockClear();
+    truncateText.mockClear();
+    extractTextFromContentPart.mockClear();
+    extractDisplayableText.mockClear();
+
+    // Default pass-through implementations
+    truncateText.mockImplementation((text: string | undefined) => text);
+    extractTextFromContentPart.mockImplementation((content: unknown) =>
+      typeof content === "string" ? content : "extracted text",
+    );
+    extractDisplayableText.mockImplementation(
+      (message: unknown) =>
+        (message as { content?: string })?.content || "extracted output",
+    );
+  });
+
+  test("renders without crashing with default props", () => {
+    render(<ChatCompletionsTable {...defaultProps} />);
+    expect(screen.getByText("No chat completions found.")).toBeInTheDocument();
+  });
+
+  test("click on a row navigates to the correct URL", () => {
+    const mockCompletion: ChatCompletion = {
+      id: "comp_123",
+      object: "chat.completion",
+      created: Math.floor(Date.now() / 1000),
+      model: "llama-test-model",
+      choices: [
+        {
+          index: 0,
+          message: { role: "assistant", content: "Test output" },
+          finish_reason: "stop",
+        },
+      ],
+      input_messages: [{ role: "user", content: "Test input" }],
+    };
+
+    // Set up mocks to return expected values
+    extractTextFromContentPart.mockReturnValue("Test input");
+    extractDisplayableText.mockReturnValue("Test output");
+
+    render(<ChatCompletionsTable {...defaultProps} data={[mockCompletion]} />);
+
+    const row = screen.getByText("Test input").closest("tr");
+    if (row) {
+      fireEvent.click(row);
+      expect(mockPush).toHaveBeenCalledWith("/logs/chat-completions/comp_123");
+    } else {
+      throw new Error('Row with "Test input" not found for router mock test.');
+    }
+  });
+
+  describe("Loading State", () => {
+    test("renders skeleton UI when isLoading is true", () => {
+      const { container } = render(
+        <ChatCompletionsTable {...defaultProps} isLoading={true} />,
+      );
+
+      // Check for skeleton in the table caption
+      const tableCaption = container.querySelector("caption");
+      expect(tableCaption).toBeInTheDocument();
+      if (tableCaption) {
+        const captionSkeleton = tableCaption.querySelector(
+          '[data-slot="skeleton"]',
+        );
+        expect(captionSkeleton).toBeInTheDocument();
+      }
+
+      // Check for skeletons in the table body cells
+      const tableBody = container.querySelector("tbody");
+      expect(tableBody).toBeInTheDocument();
+      if (tableBody) {
+        const bodySkeletons = tableBody.querySelectorAll(
+          '[data-slot="skeleton"]',
+        );
+        expect(bodySkeletons.length).toBeGreaterThan(0);
+      }
+    });
+  });
+
+  describe("Error State", () => {
+    test("renders error message when error prop is provided", () => {
+      const errorMessage = "Network Error";
+      render(
+        <ChatCompletionsTable
+          {...defaultProps}
+          error={{ name: "Error", message: errorMessage }}
+        />,
+      );
+      expect(
+        screen.getByText(`Error fetching data: ${errorMessage}`),
+      ).toBeInTheDocument();
+    });
+
+    test("renders default error message when error.message is not available", () => {
+      render(
+        <ChatCompletionsTable
+          {...defaultProps}
+          error={{ name: "Error", message: "" }}
+        />,
+      );
+      expect(
+        screen.getByText("Error fetching data: An unknown error occurred"),
+      ).toBeInTheDocument();
+    });
+
+    test("renders default error message when error prop is an object without message", () => {
+      render(<ChatCompletionsTable {...defaultProps} error={{} as Error} />);
+      expect(
+        screen.getByText("Error fetching data: An unknown error occurred"),
+      ).toBeInTheDocument();
+    });
+  });
+
+  describe("Empty State", () => {
+    test('renders "No chat completions found." and no table when data array is empty', () => {
+      render(<ChatCompletionsTable data={[]} isLoading={false} error={null} />);
+      expect(
+        screen.getByText("No chat completions found."),
+      ).toBeInTheDocument();
+
+      // Ensure that the table structure is NOT rendered in the empty state
+      const table = screen.queryByRole("table");
+      expect(table).not.toBeInTheDocument();
+    });
+  });
+
+  describe("Data Rendering", () => {
+    test("renders table caption, headers, and completion data correctly", () => {
+      const mockCompletions = [
+        {
+          id: "comp_1",
+          object: "chat.completion",
+          created: 1710000000,
+          model: "llama-test-model",
+          choices: [
+            {
+              index: 0,
+              message: { role: "assistant", content: "Test output" },
+              finish_reason: "stop",
+            },
+          ],
+          input_messages: [{ role: "user", content: "Test input" }],
+        },
+        {
+          id: "comp_2",
+          object: "chat.completion",
+          created: 1710001000,
+          model: "llama-another-model",
+          choices: [
+            {
+              index: 0,
+              message: { role: "assistant", content: "Another output" },
+              finish_reason: "stop",
+            },
+          ],
+          input_messages: [{ role: "user", content: "Another input" }],
+        },
+      ];
+
+      // Set up mocks to return expected values
+      extractTextFromContentPart.mockImplementation((content: unknown) => {
+        if (content === "Test input") return "Test input";
+        if (content === "Another input") return "Another input";
+        return "extracted text";
+      });
+      extractDisplayableText.mockImplementation((message: unknown) => {
+        const msg = message as { content?: string };
+        if (msg?.content === "Test output") return "Test output";
+        if (msg?.content === "Another output") return "Another output";
+        return "extracted output";
+      });
+
+      render(
+        <ChatCompletionsTable
+          data={mockCompletions}
+          isLoading={false}
+          error={null}
+        />,
+      );
+
+      // Table caption
+      expect(
+        screen.getByText("A list of your recent chat completions."),
+      ).toBeInTheDocument();
+
+      // Table headers
+      expect(screen.getByText("Input")).toBeInTheDocument();
+      expect(screen.getByText("Output")).toBeInTheDocument();
+      expect(screen.getByText("Model")).toBeInTheDocument();
+      expect(screen.getByText("Created")).toBeInTheDocument();
+
+      // Data rows
+      expect(screen.getByText("Test input")).toBeInTheDocument();
+      expect(screen.getByText("Test output")).toBeInTheDocument();
+      expect(screen.getByText("llama-test-model")).toBeInTheDocument();
+      expect(
+        screen.getByText(new Date(1710000000 * 1000).toLocaleString()),
+      ).toBeInTheDocument();
+
+      expect(screen.getByText("Another input")).toBeInTheDocument();
+      expect(screen.getByText("Another output")).toBeInTheDocument();
+      expect(screen.getByText("llama-another-model")).toBeInTheDocument();
+      expect(
+        screen.getByText(new Date(1710001000 * 1000).toLocaleString()),
+      ).toBeInTheDocument();
+    });
+  });
+
+  describe("Text Truncation and Content Extraction", () => {
+    test("truncates long input and output text", () => {
+      // Specific mock implementation for this test
+      truncateText.mockImplementation(
+        (text: string | undefined, maxLength?: number) => {
+          const defaultTestMaxLength = 10;
+          const effectiveMaxLength = maxLength ?? defaultTestMaxLength;
+          return typeof text === "string" && text.length > effectiveMaxLength
+            ? text.slice(0, effectiveMaxLength) + "..."
+            : text;
+        },
+      );
+
+      const longInput =
+        "This is a very long input message that should be truncated.";
+      const longOutput =
+        "This is a very long output message that should also be truncated.";
+
+      extractTextFromContentPart.mockReturnValue(longInput);
+      extractDisplayableText.mockReturnValue(longOutput);
+
+      const mockCompletions = [
+        {
+          id: "comp_trunc",
+          object: "chat.completion",
+          created: 1710002000,
+          model: "llama-trunc-model",
+          choices: [
+            {
+              index: 0,
+              message: { role: "assistant", content: longOutput },
+              finish_reason: "stop",
+            },
+          ],
+          input_messages: [{ role: "user", content: longInput }],
+        },
+      ];
+
+      render(
+        <ChatCompletionsTable
+          data={mockCompletions}
+          isLoading={false}
+          error={null}
+        />,
+      );
+
+      // The truncated text should be present for both input and output
+      const truncatedTexts = screen.getAllByText(
+        longInput.slice(0, 10) + "...",
+      );
+      expect(truncatedTexts.length).toBe(2); // one for input, one for output
+      truncatedTexts.forEach((textElement) =>
+        expect(textElement).toBeInTheDocument(),
+      );
+    });
+
+    test("uses content extraction functions correctly", () => {
+      const mockCompletion = {
+        id: "comp_extract",
+        object: "chat.completion",
+        created: 1710003000,
+        model: "llama-extract-model",
+        choices: [
+          {
+            index: 0,
+            message: { role: "assistant", content: "Extracted output" },
+            finish_reason: "stop",
+          },
+        ],
+        input_messages: [{ role: "user", content: "Extracted input" }],
+      };
+
+      extractTextFromContentPart.mockReturnValue("Extracted input");
+      extractDisplayableText.mockReturnValue("Extracted output");
+
+      render(
+        <ChatCompletionsTable
+          data={[mockCompletion]}
+          isLoading={false}
+          error={null}
+        />,
+      );
+
+      // Verify the extraction functions were called
+      expect(extractTextFromContentPart).toHaveBeenCalledWith(
+        "Extracted input",
+      );
+      expect(extractDisplayableText).toHaveBeenCalledWith({
+        role: "assistant",
+        content: "Extracted output",
+      });
+
+      // Verify the extracted content is displayed
+      expect(screen.getByText("Extracted input")).toBeInTheDocument();
+      expect(screen.getByText("Extracted output")).toBeInTheDocument();
+    });
+  });
+});
diff --git a/llama_stack/ui/components/chat-completions/chat-completions-table.tsx b/llama_stack/ui/components/chat-completions/chat-completions-table.tsx
new file mode 100644
index 000000000..5f1d2f03d
--- /dev/null
+++ b/llama_stack/ui/components/chat-completions/chat-completions-table.tsx
@@ -0,0 +1,43 @@
+"use client";
+
+import { ChatCompletion } from "@/lib/types";
+import { LogsTable, LogTableRow } from "@/components/logs/logs-table";
+import {
+  extractTextFromContentPart,
+  extractDisplayableText,
+} from "@/lib/format-message-content";
+
+interface ChatCompletionsTableProps {
+  data: ChatCompletion[];
+  isLoading: boolean;
+  error: Error | null;
+}
+
+function formatChatCompletionToRow(completion: ChatCompletion): LogTableRow {
+  return {
+    id: completion.id,
+    input: extractTextFromContentPart(completion.input_messages?.[0]?.content),
+    output: extractDisplayableText(completion.choices?.[0]?.message),
+    model: completion.model,
+    createdTime: new Date(completion.created * 1000).toLocaleString(),
+    detailPath: `/logs/chat-completions/${completion.id}`,
+  };
+}
+
+export function ChatCompletionsTable({
+  data,
+  isLoading,
+  error,
+}: ChatCompletionsTableProps) {
+  const formattedData = data.map(formatChatCompletionToRow);
+
+  return (
+    <LogsTable
+      data={formattedData}
+      isLoading={isLoading}
+      error={error}
+      caption="A list of your recent chat completions."
+      emptyMessage="No chat completions found."
+    />
+  );
+}
diff --git a/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx b/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
new file mode 100644
index 000000000..2e8593bfb
--- /dev/null
+++ b/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
@@ -0,0 +1,76 @@
+"use client";
+
+import { ChatMessage } from "@/lib/types";
+import React from "react";
+import { formatToolCallToString } from "@/lib/format-tool-call";
+import { extractTextFromContentPart } from "@/lib/format-message-content";
+import {
+  MessageBlock,
+  ToolCallBlock,
+} from "@/components/ui/message-components";
+
+interface ChatMessageItemProps {
+  message: ChatMessage;
+}
+export function ChatMessageItem({ message }: ChatMessageItemProps) {
+  switch (message.role) {
+    case "system":
+      return (
+        <MessageBlock
+          label="System"
+          content={extractTextFromContentPart(message.content)}
+        />
+      );
+    case "user":
+      return (
+        <MessageBlock
+          label="User"
+          content={extractTextFromContentPart(message.content)}
+        />
+      );
+
+    case "assistant":
+      if (
+        message.tool_calls &&
+        Array.isArray(message.tool_calls) &&
+        message.tool_calls.length > 0
+      ) {
+        return (
+          <>
+            {message.tool_calls.map((toolCall: any, index: number) => {
+              const formattedToolCall = formatToolCallToString(toolCall);
+              const toolCallContent = (
+                <ToolCallBlock>
+                  {formattedToolCall || "Error: Could not display tool call"}
+                </ToolCallBlock>
+              );
+              return (
+                <MessageBlock
+                  key={index}
+                  label="Tool Call"
+                  content={toolCallContent}
+                />
+              );
+            })}
+          </>
+        );
+      } else {
+        return (
+          <MessageBlock
+            label="Assistant"
+            content={extractTextFromContentPart(message.content)}
+          />
+        );
+      }
+    case "tool":
+      const toolOutputContent = (
+        <ToolCallBlock>
+          {extractTextFromContentPart(message.content)}
+        </ToolCallBlock>
+      );
+      return (
+        <MessageBlock label="Tool Call Output" content={toolOutputContent} />
+      );
+  }
+  return null;
+}
diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/llama_stack/ui/components/layout/app-sidebar.tsx
new file mode 100644
index 000000000..1c53d6cc5
--- /dev/null
+++ b/llama_stack/ui/components/layout/app-sidebar.tsx
@@ -0,0 +1,82 @@
+"use client";
+
+import { MessageSquareText, MessagesSquare, MoveUpRight } from "lucide-react";
+import Link from "next/link";
+import { usePathname } from "next/navigation";
+import { cn } from "@/lib/utils";
+
+import {
+  Sidebar,
+  SidebarContent,
+  SidebarGroup,
+  SidebarGroupContent,
+  SidebarGroupLabel,
+  SidebarMenu,
+  SidebarMenuButton,
+  SidebarMenuItem,
+  SidebarHeader,
+} from "@/components/ui/sidebar";
+
+const logItems = [
+  {
+    title: "Chat Completions",
+    url: "/logs/chat-completions",
+    icon: MessageSquareText,
+  },
+  {
+    title: "Responses",
+    url: "/logs/responses",
+    icon: MessagesSquare,
+  },
+  {
+    title: "Documentation",
+    url: "https://llama-stack.readthedocs.io/en/latest/references/api_reference/index.html",
+    icon: MoveUpRight,
+  },
+];
+
+export function AppSidebar() {
+  const pathname = usePathname();
+
+  return (
+    <Sidebar>
+      <SidebarHeader>
+        <Link href="/">Llama Stack</Link>
+      </SidebarHeader>
+      <SidebarContent>
+        <SidebarGroup>
+          <SidebarGroupLabel>Logs</SidebarGroupLabel>
+          <SidebarGroupContent>
+            <SidebarMenu>
+              {logItems.map((item) => {
+                const isActive = pathname.startsWith(item.url);
+                return (
+                  <SidebarMenuItem key={item.title}>
+                    <SidebarMenuButton
+                      asChild
+                      className={cn(
+                        "justify-start",
+                        isActive &&
+                          "bg-gray-200 hover:bg-gray-200 text-primary hover:text-primary",
+                      )}
+                    >
+                      <Link href={item.url}>
+                        <item.icon
+                          className={cn(
+                            isActive && "text-primary",
+                            "mr-2 h-4 w-4",
+                          )}
+                        />
+                        <span>{item.title}</span>
+                      </Link>
+                    </SidebarMenuButton>
+                  </SidebarMenuItem>
+                );
+              })}
+            </SidebarMenu>
+          </SidebarGroupContent>
+        </SidebarGroup>
+      </SidebarContent>
+    </Sidebar>
+  );
+}
diff --git a/llama_stack/ui/components/layout/detail-layout.tsx b/llama_stack/ui/components/layout/detail-layout.tsx
new file mode 100644
index 000000000..58b912703
--- /dev/null
+++ b/llama_stack/ui/components/layout/detail-layout.tsx
@@ -0,0 +1,141 @@
+import React from "react";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Skeleton } from "@/components/ui/skeleton";
+
+export function DetailLoadingView({ title }: { title: string }) {
+  return (
+    <>
+      <Skeleton className="h-8 w-3/4 mb-6" /> {/* Title Skeleton */}
+      <div className="flex flex-col md:flex-row gap-6">
+        <div className="flex-grow md:w-2/3 space-y-6">
+          {[...Array(2)].map((_, i) => (
+            <Card key={`main-skeleton-card-${i}`}>
+              <CardHeader>
+                <CardTitle>
+                  <Skeleton className="h-6 w-1/2" />
+                </CardTitle>
+              </CardHeader>
+              <CardContent className="space-y-2">
+                <Skeleton className="h-4 w-full" />
+                <Skeleton className="h-4 w-full" />
+                <Skeleton className="h-4 w-3/4" />
+              </CardContent>
+            </Card>
+          ))}
+        </div>
+        <div className="md:w-1/3">
+          <div className="p-4 border rounded-lg shadow-sm bg-white space-y-3">
+            <Skeleton className="h-6 w-1/3 mb-3" />{" "}
+            {/* Properties Title Skeleton */}
+            {[...Array(5)].map((_, i) => (
+              <div key={`prop-skeleton-${i}`} className="space-y-1">
+                <Skeleton className="h-4 w-1/4" />
+                <Skeleton className="h-4 w-1/2" />
+              </div>
+            ))}
+          </div>
+        </div>
+      </div>
+    </>
+  );
+}
+
+export function DetailErrorView({
+  title,
+  id,
+  error,
+}: {
+  title: string;
+  id: string;
+  error: Error;
+}) {
+  return (
+    <>
+      <h1 className="text-2xl font-bold mb-6">{title}</h1>
+      <p>
+        Error loading details for ID {id}: {error.message}
+      </p>
+    </>
+  );
+}
+
+export function DetailNotFoundView({
+  title,
+  id,
+}: {
+  title: string;
+  id: string;
+}) {
+  return (
+    <>
+      <h1 className="text-2xl font-bold mb-6">{title}</h1>
+      <p>No details found for ID: {id}.</p>
+    </>
+  );
+}
+
+export interface PropertyItemProps {
+  label: string;
+  value: React.ReactNode;
+  className?: string;
+  hasBorder?: boolean;
+}
+
+export function PropertyItem({
+  label,
+  value,
+  className = "",
+  hasBorder = false,
+}: PropertyItemProps) {
+  return (
+    <li
+      className={`${hasBorder ? "pt-1 mt-1 border-t border-gray-200" : ""} ${className}`}
+    >
+      <strong>{label}:</strong>{" "}
+      {typeof value === "string" || typeof value === "number" ? (
+        <span className="text-gray-900 font-medium">{value}</span>
+      ) : (
+        value
+      )}
+    </li>
+  );
+}
+
+export interface PropertiesCardProps {
+  children: React.ReactNode;
+}
+
+export function PropertiesCard({ children }: PropertiesCardProps) {
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle>Properties</CardTitle>
+      </CardHeader>
+      <CardContent>
+        <ul className="space-y-2 text-sm text-gray-600">{children}</ul>
+      </CardContent>
+    </Card>
+  );
+}
+
+export interface DetailLayoutProps {
+  title: string;
+  mainContent: React.ReactNode;
+  sidebar: React.ReactNode;
+}
+
+export function DetailLayout({
+  title,
+  mainContent,
+  sidebar,
+}: DetailLayoutProps) {
+  return (
+    <>
+      <h1 className="text-2xl font-bold mb-6">{title}</h1>
+      <div className="flex flex-col md:flex-row gap-6">
+        <div className="flex-grow md:w-2/3 space-y-6">{mainContent}</div>
+        <div className="md:w-1/3">{sidebar}</div>
+      </div>
+    </>
+  );
+}
diff --git a/llama_stack/ui/components/layout/logs-layout.tsx b/llama_stack/ui/components/layout/logs-layout.tsx
new file mode 100644
index 000000000..468ad6e9a
--- /dev/null
+++ b/llama_stack/ui/components/layout/logs-layout.tsx
@@ -0,0 +1,49 @@
+"use client";
+
+import React from "react";
+import { usePathname, useParams } from "next/navigation";
+import {
+  PageBreadcrumb,
+  BreadcrumbSegment,
+} from "@/components/layout/page-breadcrumb";
+import { truncateText } from "@/lib/truncate-text";
+
+interface LogsLayoutProps {
+  children: React.ReactNode;
+  sectionLabel: string;
+  basePath: string;
+}
+
+export default function LogsLayout({
+  children,
+  sectionLabel,
+  basePath,
+}: LogsLayoutProps) {
+  const pathname = usePathname();
+  const params = useParams();
+
+  let segments: BreadcrumbSegment[] = [];
+
+  if (pathname === basePath) {
+    segments = [{ label: sectionLabel }];
+  }
+
+  const idParam = params?.id;
+  if (idParam && typeof idParam === "string") {
+    segments = [
+      { label: sectionLabel, href: basePath },
+      { label: `Details (${truncateText(idParam, 20)})` },
+    ];
+  }
+
+  return (
+    <div className="container mx-auto p-4">
+      <>
+        {segments.length > 0 && (
+          <PageBreadcrumb segments={segments} className="mb-4" />
+        )}
+        {children}
+      </>
+    </div>
+  );
+}
diff --git a/llama_stack/ui/components/layout/page-breadcrumb.tsx b/llama_stack/ui/components/layout/page-breadcrumb.tsx
new file mode 100644
index 000000000..fdb561d68
--- /dev/null
+++ b/llama_stack/ui/components/layout/page-breadcrumb.tsx
@@ -0,0 +1,49 @@
+"use client";
+
+import Link from "next/link";
+import React from "react";
+import {
+  Breadcrumb,
+  BreadcrumbItem,
+  BreadcrumbLink,
+  BreadcrumbList,
+  BreadcrumbPage,
+  BreadcrumbSeparator,
+} from "@/components/ui/breadcrumb";
+
+export interface BreadcrumbSegment {
+  label: string;
+  href?: string;
+}
+
+interface PageBreadcrumbProps {
+  segments: BreadcrumbSegment[];
+  className?: string;
+}
+
+export function PageBreadcrumb({ segments, className }: PageBreadcrumbProps) {
+  if (!segments || segments.length === 0) {
+    return null;
+  }
+
+  return (
+    <Breadcrumb className={className}>
+      <BreadcrumbList>
+        {segments.map((segment, index) => (
+          <React.Fragment key={segment.label + index}>
+            <BreadcrumbItem>
+              {segment.href ? (
+                <BreadcrumbLink asChild>
+                  <Link href={segment.href}>{segment.label}</Link>
+                </BreadcrumbLink>
+              ) : (
+                <BreadcrumbPage>{segment.label}</BreadcrumbPage>
+              )}
+            </BreadcrumbItem>
+            {index < segments.length - 1 && <BreadcrumbSeparator />}
+          </React.Fragment>
+        ))}
+      </BreadcrumbList>
+    </Breadcrumb>
+  );
+}
diff --git a/llama_stack/ui/components/logs/logs-table.test.tsx b/llama_stack/ui/components/logs/logs-table.test.tsx
new file mode 100644
index 000000000..88263b2fc
--- /dev/null
+++ b/llama_stack/ui/components/logs/logs-table.test.tsx
@@ -0,0 +1,350 @@
+import React from "react";
+import { render, screen, fireEvent } from "@testing-library/react";
+import "@testing-library/jest-dom";
+import { LogsTable, LogTableRow } from "./logs-table";
+
+// Mock next/navigation
+const mockPush = jest.fn();
+jest.mock("next/navigation", () => ({
+  useRouter: () => ({
+    push: mockPush,
+  }),
+}));
+
+// Mock helper functions
+jest.mock("@/lib/truncate-text");
+
+// Import the mocked functions
+import { truncateText as originalTruncateText } from "@/lib/truncate-text";
+
+// Cast to jest.Mock for typings
+const truncateText = originalTruncateText as jest.Mock;
+
+describe("LogsTable", () => {
+  const defaultProps = {
+    data: [] as LogTableRow[],
+    isLoading: false,
+    error: null,
+    caption: "Test table caption",
+    emptyMessage: "No data found",
+  };
+
+  beforeEach(() => {
+    // Reset all mocks before each test
+    mockPush.mockClear();
+    truncateText.mockClear();
+
+    // Default pass-through implementation
+    truncateText.mockImplementation((text: string | undefined) => text);
+  });
+
+  test("renders without crashing with default props", () => {
+    render(<LogsTable {...defaultProps} />);
+    expect(screen.getByText("No data found")).toBeInTheDocument();
+  });
+
+  test("click on a row navigates to the correct URL", () => {
+    const mockData: LogTableRow[] = [
+      {
+        id: "row_123",
+        input: "Test input",
+        output: "Test output",
+        model: "test-model",
+        createdTime: "2024-01-01 12:00:00",
+        detailPath: "/test/path/row_123",
+      },
+    ];
+
+    render(<LogsTable {...defaultProps} data={mockData} />);
+
+    const row = screen.getByText("Test input").closest("tr");
+    if (row) {
+      fireEvent.click(row);
+      expect(mockPush).toHaveBeenCalledWith("/test/path/row_123");
+    } else {
+      throw new Error('Row with "Test input" not found for router mock test.');
+    }
+  });
+
+  describe("Loading State", () => {
+    test("renders skeleton UI when isLoading is true", () => {
+      const { container } = render(
+        <LogsTable {...defaultProps} isLoading={true} />,
+      );
+
+      // Check for skeleton in the table caption
+      const tableCaption = container.querySelector("caption");
+      expect(tableCaption).toBeInTheDocument();
+      if (tableCaption) {
+        const captionSkeleton = tableCaption.querySelector(
+          '[data-slot="skeleton"]',
+        );
+        expect(captionSkeleton).toBeInTheDocument();
+      }
+
+      // Check for skeletons in the table body cells
+      const tableBody = container.querySelector("tbody");
+      expect(tableBody).toBeInTheDocument();
+      if (tableBody) {
+        const bodySkeletons = tableBody.querySelectorAll(
+          '[data-slot="skeleton"]',
+        );
+        expect(bodySkeletons.length).toBeGreaterThan(0);
+      }
+
+      // Check that table headers are still rendered
+      expect(screen.getByText("Input")).toBeInTheDocument();
+      expect(screen.getByText("Output")).toBeInTheDocument();
+      expect(screen.getByText("Model")).toBeInTheDocument();
+      expect(screen.getByText("Created")).toBeInTheDocument();
+    });
+
+    test("renders correct number of skeleton rows", () => {
+      const { container } = render(
+        <LogsTable {...defaultProps} isLoading={true} />,
+      );
+
+      const skeletonRows = container.querySelectorAll("tbody tr");
+      expect(skeletonRows.length).toBe(3); // Should render 3 skeleton rows
+    });
+  });
+
+  describe("Error State", () => {
+    test("renders error message when error prop is provided", () => {
+      const errorMessage = "Network Error";
+      render(
+        <LogsTable
+          {...defaultProps}
+          error={{ name: "Error", message: errorMessage }}
+        />,
+      );
+      expect(
+        screen.getByText(`Error fetching data: ${errorMessage}`),
+      ).toBeInTheDocument();
+    });
+
+    test("renders default error message when error.message is not available", () => {
+      render(
+        <LogsTable {...defaultProps} error={{ name: "Error", message: "" }} />,
+      );
+      expect(
+        screen.getByText("Error fetching data: An unknown error occurred"),
+      ).toBeInTheDocument();
+    });
+
+    test("renders default error message when error prop is an object without message", () => {
+      render(<LogsTable {...defaultProps} error={{} as Error} />);
+      expect(
+        screen.getByText("Error fetching data: An unknown error occurred"),
+      ).toBeInTheDocument();
+    });
+
+    test("does not render table when in error state", () => {
+      render(
+        <LogsTable
+          {...defaultProps}
+          error={{ name: "Error", message: "Test error" }}
+        />,
+      );
+      const table = screen.queryByRole("table");
+      expect(table).not.toBeInTheDocument();
+    });
+  });
+
+  describe("Empty State", () => {
+    test("renders custom empty message when data array is empty", () => {
+      render(
+        <LogsTable
+          {...defaultProps}
+          data={[]}
+          emptyMessage="Custom empty message"
+        />,
+      );
+      expect(screen.getByText("Custom empty message")).toBeInTheDocument();
+
+      // Ensure that the table structure is NOT rendered in the empty state
+      const table = screen.queryByRole("table");
+      expect(table).not.toBeInTheDocument();
+    });
+  });
+
+  describe("Data Rendering", () => {
+    test("renders table caption, headers, and data correctly", () => {
+      const mockData: LogTableRow[] = [
+        {
+          id: "row_1",
+          input: "First input",
+          output: "First output",
+          model: "model-1",
+          createdTime: "2024-01-01 12:00:00",
+          detailPath: "/path/1",
+        },
+        {
+          id: "row_2",
+          input: "Second input",
+          output: "Second output",
+          model: "model-2",
+          createdTime: "2024-01-02 13:00:00",
+          detailPath: "/path/2",
+        },
+      ];
+
+      render(
+        <LogsTable
+          {...defaultProps}
+          data={mockData}
+          caption="Custom table caption"
+        />,
+      );
+
+      // Table caption
+      expect(screen.getByText("Custom table caption")).toBeInTheDocument();
+
+      // Table headers
+      expect(screen.getByText("Input")).toBeInTheDocument();
+      expect(screen.getByText("Output")).toBeInTheDocument();
+      expect(screen.getByText("Model")).toBeInTheDocument();
+      expect(screen.getByText("Created")).toBeInTheDocument();
+
+      // Data rows
+      expect(screen.getByText("First input")).toBeInTheDocument();
+      expect(screen.getByText("First output")).toBeInTheDocument();
+      expect(screen.getByText("model-1")).toBeInTheDocument();
+      expect(screen.getByText("2024-01-01 12:00:00")).toBeInTheDocument();
+
+      expect(screen.getByText("Second input")).toBeInTheDocument();
+      expect(screen.getByText("Second output")).toBeInTheDocument();
+      expect(screen.getByText("model-2")).toBeInTheDocument();
+      expect(screen.getByText("2024-01-02 13:00:00")).toBeInTheDocument();
+    });
+
+    test("applies correct CSS classes to table rows", () => {
+      const mockData: LogTableRow[] = [
+        {
+          id: "row_1",
+          input: "Test input",
+          output: "Test output",
+          model: "test-model",
+          createdTime: "2024-01-01 12:00:00",
+          detailPath: "/test/path",
+        },
+      ];
+
+      render(<LogsTable {...defaultProps} data={mockData} />);
+
+      const row = screen.getByText("Test input").closest("tr");
+      expect(row).toHaveClass("cursor-pointer");
+      expect(row).toHaveClass("hover:bg-muted/50");
+    });
+
+    test("applies correct alignment to Created column", () => {
+      const mockData: LogTableRow[] = [
+        {
+          id: "row_1",
+          input: "Test input",
+          output: "Test output",
+          model: "test-model",
+          createdTime: "2024-01-01 12:00:00",
+          detailPath: "/test/path",
+        },
+      ];
+
+      render(<LogsTable {...defaultProps} data={mockData} />);
+
+      const createdCell = screen.getByText("2024-01-01 12:00:00").closest("td");
+      expect(createdCell).toHaveClass("text-right");
+    });
+  });
+
+  describe("Text Truncation", () => {
+    test("truncates input and output text using truncateText function", () => {
+      // Mock truncateText to return truncated versions
+      truncateText.mockImplementation((text: string | undefined) => {
+        if (typeof text === "string" && text.length > 10) {
+          return text.slice(0, 10) + "...";
+        }
+        return text;
+      });
+
+      const longInput =
+        "This is a very long input text that should be truncated";
+      const longOutput =
+        "This is a very long output text that should be truncated";
+
+      const mockData: LogTableRow[] = [
+        {
+          id: "row_1",
+          input: longInput,
+          output: longOutput,
+          model: "test-model",
+          createdTime: "2024-01-01 12:00:00",
+          detailPath: "/test/path",
+        },
+      ];
+
+      render(<LogsTable {...defaultProps} data={mockData} />);
+
+      // Verify truncateText was called
+      expect(truncateText).toHaveBeenCalledWith(longInput);
+      expect(truncateText).toHaveBeenCalledWith(longOutput);
+
+      // Verify truncated text is displayed
+      const truncatedTexts = screen.getAllByText("This is a ...");
+      expect(truncatedTexts).toHaveLength(2); // one for input, one for output
+      truncatedTexts.forEach((textElement) =>
+        expect(textElement).toBeInTheDocument(),
+      );
+    });
+
+    test("does not truncate model names", () => {
+      const mockData: LogTableRow[] = [
+        {
+          id: "row_1",
+          input: "Test input",
+          output: "Test output",
+          model: "very-long-model-name-that-should-not-be-truncated",
+          createdTime: "2024-01-01 12:00:00",
+          detailPath: "/test/path",
+        },
+      ];
+
+      render(<LogsTable {...defaultProps} data={mockData} />);
+
+      // Model name should not be passed to truncateText
+      expect(truncateText).not.toHaveBeenCalledWith(
+        "very-long-model-name-that-should-not-be-truncated",
+      );
+
+      // Full model name should be displayed
+      expect(
+        screen.getByText("very-long-model-name-that-should-not-be-truncated"),
+      ).toBeInTheDocument();
+    });
+  });
+
+  describe("Accessibility", () => {
+    test("table has proper role and structure", () => {
+      const mockData: LogTableRow[] = [
+        {
+          id: "row_1",
+          input: "Test input",
+          output: "Test output",
+          model: "test-model",
+          createdTime: "2024-01-01 12:00:00",
+          detailPath: "/test/path",
+        },
+      ];
+
+      render(<LogsTable {...defaultProps} data={mockData} />);
+
+      const table = screen.getByRole("table");
+      expect(table).toBeInTheDocument();
+
+      const columnHeaders = screen.getAllByRole("columnheader");
+      expect(columnHeaders).toHaveLength(4);
+
+      const rows = screen.getAllByRole("row");
+      expect(rows).toHaveLength(2); // 1 header row + 1 data row
+    });
+  });
+});
diff --git a/llama_stack/ui/components/logs/logs-table.tsx b/llama_stack/ui/components/logs/logs-table.tsx
new file mode 100644
index 000000000..33afea61b
--- /dev/null
+++ b/llama_stack/ui/components/logs/logs-table.tsx
@@ -0,0 +1,113 @@
+"use client";
+
+import { useRouter } from "next/navigation";
+import { truncateText } from "@/lib/truncate-text";
+import {
+  Table,
+  TableBody,
+  TableCaption,
+  TableCell,
+  TableHead,
+  TableHeader,
+  TableRow,
+} from "@/components/ui/table";
+import { Skeleton } from "@/components/ui/skeleton";
+
+// Generic table row data interface
+export interface LogTableRow {
+  id: string;
+  input: string;
+  output: string;
+  model: string;
+  createdTime: string;
+  detailPath: string;
+}
+
+interface LogsTableProps {
+  data: LogTableRow[];
+  isLoading: boolean;
+  error: Error | null;
+  caption: string;
+  emptyMessage: string;
+}
+
+export function LogsTable({
+  data,
+  isLoading,
+  error,
+  caption,
+  emptyMessage,
+}: LogsTableProps) {
+  const router = useRouter();
+
+  const tableHeader = (
+    <TableHeader>
+      <TableRow>
+        <TableHead>Input</TableHead>
+        <TableHead>Output</TableHead>
+        <TableHead>Model</TableHead>
+        <TableHead className="text-right">Created</TableHead>
+      </TableRow>
+    </TableHeader>
+  );
+
+  if (isLoading) {
+    return (
+      <Table>
+        <TableCaption>
+          <Skeleton className="h-4 w-[250px] mx-auto" />
+        </TableCaption>
+        {tableHeader}
+        <TableBody>
+          {[...Array(3)].map((_, i) => (
+            <TableRow key={`skeleton-${i}`}>
+              <TableCell>
+                <Skeleton className="h-4 w-full" />
+              </TableCell>
+              <TableCell>
+                <Skeleton className="h-4 w-full" />
+              </TableCell>
+              <TableCell>
+                <Skeleton className="h-4 w-3/4" />
+              </TableCell>
+              <TableCell className="text-right">
+                <Skeleton className="h-4 w-1/2 ml-auto" />
+              </TableCell>
+            </TableRow>
+          ))}
+        </TableBody>
+      </Table>
+    );
+  }
+
+  if (error) {
+    return (
+      <p>Error fetching data: {error.message || "An unknown error occurred"}</p>
+    );
+  }
+
+  if (data.length === 0) {
+    return <p>{emptyMessage}</p>;
+  }
+
+  return (
+    <Table>
+      <TableCaption>{caption}</TableCaption>
+      {tableHeader}
+      <TableBody>
+        {data.map((row) => (
+          <TableRow
+            key={row.id}
+            onClick={() => router.push(row.detailPath)}
+            className="cursor-pointer hover:bg-muted/50"
+          >
+            <TableCell>{truncateText(row.input)}</TableCell>
+            <TableCell>{truncateText(row.output)}</TableCell>
+            <TableCell>{row.model}</TableCell>
+            <TableCell className="text-right">{row.createdTime}</TableCell>
+          </TableRow>
+        ))}
+      </TableBody>
+    </Table>
+  );
+}
diff --git a/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx b/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
new file mode 100644
index 000000000..6ddc0eacc
--- /dev/null
+++ b/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
@@ -0,0 +1,56 @@
+import { useFunctionCallGrouping } from "../hooks/function-call-grouping";
+import { ItemRenderer } from "../items/item-renderer";
+import { GroupedFunctionCallItemComponent } from "../items/grouped-function-call-item";
+import {
+  isFunctionCallItem,
+  isFunctionCallOutputItem,
+  AnyResponseItem,
+} from "../utils/item-types";
+
+interface GroupedItemsDisplayProps {
+  items: AnyResponseItem[];
+  keyPrefix: string;
+  defaultRole?: string;
+}
+
+export function GroupedItemsDisplay({
+  items,
+  keyPrefix,
+  defaultRole = "unknown",
+}: GroupedItemsDisplayProps) {
+  const groupedItems = useFunctionCallGrouping(items);
+
+  return (
+    <>
+      {groupedItems.map((groupedItem) => {
+        // If this is a function call with an output, render the grouped component
+        if (
+          groupedItem.outputItem &&
+          isFunctionCallItem(groupedItem.item) &&
+          isFunctionCallOutputItem(groupedItem.outputItem)
+        ) {
+          return (
+            <GroupedFunctionCallItemComponent
+              key={`${keyPrefix}-${groupedItem.index}`}
+              functionCall={groupedItem.item}
+              output={groupedItem.outputItem}
+              index={groupedItem.index}
+              keyPrefix={keyPrefix}
+            />
+          );
+        }
+
+        // Otherwise, render the individual item
+        return (
+          <ItemRenderer
+            key={`${keyPrefix}-${groupedItem.index}`}
+            item={groupedItem.item}
+            index={groupedItem.index}
+            keyPrefix={keyPrefix}
+            defaultRole={defaultRole}
+          />
+        );
+      })}
+    </>
+  );
+}
diff --git a/llama_stack/ui/components/responses/hooks/function-call-grouping.ts b/llama_stack/ui/components/responses/hooks/function-call-grouping.ts
new file mode 100644
index 000000000..2994354d5
--- /dev/null
+++ b/llama_stack/ui/components/responses/hooks/function-call-grouping.ts
@@ -0,0 +1,92 @@
+import { useMemo } from "react";
+import {
+  isFunctionCallOutputItem,
+  AnyResponseItem,
+  FunctionCallOutputItem,
+} from "../utils/item-types";
+
+export interface GroupedItem {
+  item: AnyResponseItem;
+  index: number;
+  outputItem?: AnyResponseItem;
+  outputIndex?: number;
+}
+
+/**
+ * Hook to group function calls with their corresponding outputs
+ * @param items Array of items to group
+ * @returns Array of grouped items with their outputs
+ */
+export function useFunctionCallGrouping(
+  items: AnyResponseItem[],
+): GroupedItem[] {
+  return useMemo(() => {
+    const groupedItems: GroupedItem[] = [];
+    const processedIndices = new Set<number>();
+
+    // Build a map of call_id to indices for function_call_output items
+    const callIdToIndices = new Map<string, number[]>();
+
+    for (let i = 0; i < items.length; i++) {
+      const item = items[i];
+      if (isFunctionCallOutputItem(item)) {
+        if (!callIdToIndices.has(item.call_id)) {
+          callIdToIndices.set(item.call_id, []);
+        }
+        callIdToIndices.get(item.call_id)!.push(i);
+      }
+    }
+
+    // Process items and group function calls with their outputs
+    for (let i = 0; i < items.length; i++) {
+      if (processedIndices.has(i)) {
+        continue;
+      }
+
+      const currentItem = items[i];
+
+      if (
+        currentItem.type === "function_call" &&
+        "name" in currentItem &&
+        "call_id" in currentItem
+      ) {
+        const functionCallId = currentItem.call_id as string;
+        let outputIndex = -1;
+        let outputItem: FunctionCallOutputItem | null = null;
+
+        const relatedIndices = callIdToIndices.get(functionCallId) || [];
+        for (const idx of relatedIndices) {
+          const potentialOutput = items[idx];
+          outputIndex = idx;
+          outputItem = potentialOutput as FunctionCallOutputItem;
+          break;
+        }
+
+        if (outputItem && outputIndex !== -1) {
+          // Group function call with its function_call_output
+          groupedItems.push({
+            item: currentItem,
+            index: i,
+            outputItem,
+            outputIndex,
+          });
+
+          // Mark both items as processed
+          processedIndices.add(i);
+          processedIndices.add(outputIndex);
+
+          // Matching function call and output found, skip to next item
+          continue;
+        }
+      }
+      // render normally
+      groupedItems.push({
+        item: currentItem,
+        index: i,
+      });
+      processedIndices.add(i);
+    }
+
+    return groupedItems;
+  }, [items]);
+}
diff --git a/llama_stack/ui/components/responses/items/function-call-item.tsx b/llama_stack/ui/components/responses/items/function-call-item.tsx
new file mode 100644
index 000000000..beca935f0
--- /dev/null
+++ b/llama_stack/ui/components/responses/items/function-call-item.tsx
@@ -0,0 +1,29 @@
+import {
+  MessageBlock,
+  ToolCallBlock,
+} from "@/components/ui/message-components";
+import { FunctionCallItem } from "../utils/item-types";
+
+interface FunctionCallItemProps {
+  item: FunctionCallItem;
+  index: number;
+  keyPrefix: string;
+}
+
+export function FunctionCallItemComponent({
+  item,
+  index,
+  keyPrefix,
+}: FunctionCallItemProps) {
+  const name = item.name || "unknown";
+  const args = item.arguments || "{}";
+  const formattedFunctionCall = `${name}(${args})`;
+
+  return (
+    <MessageBlock
+      key={`${keyPrefix}-${index}`}
+      label="Function Call"
+      content={<ToolCallBlock>{formattedFunctionCall}</ToolCallBlock>}
+    />
+  );
+}
diff --git a/llama_stack/ui/components/responses/items/generic-item.tsx b/llama_stack/ui/components/responses/items/generic-item.tsx
new file mode 100644
index 000000000..6b6f56603
--- /dev/null
+++ b/llama_stack/ui/components/responses/items/generic-item.tsx
@@ -0,0 +1,37 @@
+import {
+  MessageBlock,
+  ToolCallBlock,
+} from "@/components/ui/message-components";
+import { BaseItem } from "../utils/item-types";
+
+interface GenericItemProps {
+  item: BaseItem;
+  index: number;
+  keyPrefix: string;
+}
+
+export function GenericItemComponent({
+  item,
+  index,
+  keyPrefix,
+}: GenericItemProps) {
+  // Handle other types like function calls, tool outputs, etc.
+  const itemData = item as Record<string, unknown>;
+
+  const content = itemData.content
+    ? typeof itemData.content === "string"
+      ? itemData.content
+      : JSON.stringify(itemData.content, null, 2)
+    : JSON.stringify(itemData, null, 2);
+
+  const label = keyPrefix === "input" ? "Input" : "Output";
+
+  return (
+    <MessageBlock
+      key={`${keyPrefix}-${index}`}
+      label={label}
+      labelDetail={`(${itemData.type})`}
+      content={<ToolCallBlock>{content}</ToolCallBlock>}
+    />
+  );
+}
diff --git a/llama_stack/ui/components/responses/items/grouped-function-call-item.tsx b/llama_stack/ui/components/responses/items/grouped-function-call-item.tsx
new file mode 100644
index 000000000..ded0ced71
--- /dev/null
+++ b/llama_stack/ui/components/responses/items/grouped-function-call-item.tsx
@@ -0,0 +1,54 @@
+import {
+  MessageBlock,
+  ToolCallBlock,
+} from "@/components/ui/message-components";
+import { FunctionCallItem, FunctionCallOutputItem } from "../utils/item-types";
+
+interface GroupedFunctionCallItemProps {
+  functionCall: FunctionCallItem;
+  output: FunctionCallOutputItem;
+  index: number;
+  keyPrefix: string;
+}
+
+export function GroupedFunctionCallItemComponent({
+  functionCall,
+  output,
+  index,
+  keyPrefix,
+}: GroupedFunctionCallItemProps) {
+  const name = functionCall.name || "unknown";
+  const args = functionCall.arguments || "{}";
+
+  // Extract the output content from function_call_output
+  let outputContent = "";
+  if (output.output) {
+    outputContent =
+      typeof output.output === "string"
+        ? output.output
+        : JSON.stringify(output.output);
+  } else {
+    outputContent = JSON.stringify(output, null, 2);
+  }
+
+  const functionCallContent = (
+    <div>
+      <div className="mb-2">
+        <span className="text-sm text-gray-600">Arguments</span>
+        <ToolCallBlock>{`${name}(${args})`}</ToolCallBlock>
+      </div>
+      <div>
+        <span className="text-sm text-gray-600">Output</span>
+        <ToolCallBlock>{outputContent}</ToolCallBlock>
+      </div>
+    </div>
+  );
+
+  return (
+    <MessageBlock
+      key={`${keyPrefix}-${index}`}
+      label="Function Call"
+      content={functionCallContent}
+    />
+  );
+}
diff --git a/llama_stack/ui/components/responses/items/index.ts b/llama_stack/ui/components/responses/items/index.ts
new file mode 100644
index 000000000..d7bcc2ea4
--- /dev/null
+++ b/llama_stack/ui/components/responses/items/index.ts
@@ -0,0 +1,6 @@
+export { MessageItemComponent } from "./message-item";
+export { FunctionCallItemComponent } from "./function-call-item";
+export { WebSearchItemComponent } from "./web-search-item";
+export { GenericItemComponent } from "./generic-item";
+export { GroupedFunctionCallItemComponent } from "./grouped-function-call-item";
+export { ItemRenderer } from "./item-renderer";
diff --git a/llama_stack/ui/components/responses/items/item-renderer.tsx b/llama_stack/ui/components/responses/items/item-renderer.tsx
new file mode 100644
index 000000000..8f65d50c4
--- /dev/null
+++ b/llama_stack/ui/components/responses/items/item-renderer.tsx
@@ -0,0 +1,60 @@
+import {
+  isMessageItem,
+  isFunctionCallItem,
+  isWebSearchCallItem,
+  AnyResponseItem,
+} from "../utils/item-types";
+import { MessageItemComponent } from "./message-item";
+import { FunctionCallItemComponent } from "./function-call-item";
+import { WebSearchItemComponent } from "./web-search-item";
+import { GenericItemComponent } from "./generic-item";
+
+interface ItemRendererProps {
+  item: AnyResponseItem;
+  index: number;
+  keyPrefix: string;
+  defaultRole?: string;
+}
+
+export function ItemRenderer({
+  item,
+  index,
+  keyPrefix,
+  defaultRole = "unknown",
+}: ItemRendererProps) {
+  if (isMessageItem(item)) {
+    return (
+      <MessageItemComponent
+        item={item}
+        index={index}
+        keyPrefix={keyPrefix}
+        defaultRole={defaultRole}
+      />
+    );
+  }
+
+  if (isFunctionCallItem(item)) {
+    return (
+      <FunctionCallItemComponent
+        item={item}
+        index={index}
+        keyPrefix={keyPrefix}
+      />
+    );
+  }
+
+  if (isWebSearchCallItem(item)) {
+    return (
+      <WebSearchItemComponent item={item} index={index} keyPrefix={keyPrefix} />
+    );
+  }
+
+  // Fallback to generic item for unknown types
+  return (
+    <GenericItemComponent
+      item={item as any}
+      index={index}
+      keyPrefix={keyPrefix}
+    />
+  );
+}
diff --git a/llama_stack/ui/components/responses/items/message-item.tsx b/llama_stack/ui/components/responses/items/message-item.tsx
new file mode 100644
index 000000000..532fddfaa
--- /dev/null
+++ b/llama_stack/ui/components/responses/items/message-item.tsx
@@ -0,0 +1,41 @@
+import { MessageBlock } from "@/components/ui/message-components";
+import { MessageItem } from "../utils/item-types";
+
+interface MessageItemProps {
+  item: MessageItem;
+  index: number;
+  keyPrefix: string;
+  defaultRole?: string;
+}
+
+export function MessageItemComponent({
+  item,
+  index,
+  keyPrefix,
+  defaultRole = "unknown",
+}: MessageItemProps) {
+  let content = "";
+
+  if (typeof item.content === "string") {
+    content = item.content;
+  } else if (Array.isArray(item.content)) {
+    content = item.content
+      .map((c) => {
+        return c.type === "input_text" || c.type === "output_text"
+          ? c.text
+          : JSON.stringify(c);
+      })
+      .join(" ");
+  }
+
+  const role = item.role || defaultRole;
+  const label = role.charAt(0).toUpperCase() + role.slice(1);
+
+  return (
+    <MessageBlock
+      key={`${keyPrefix}-${index}`}
+      label={label}
+      content={content}
+    />
+  );
+}
diff --git a/llama_stack/ui/components/responses/items/web-search-item.tsx b/llama_stack/ui/components/responses/items/web-search-item.tsx
new file mode 100644
index 000000000..aaa5741ce
--- /dev/null
+++ b/llama_stack/ui/components/responses/items/web-search-item.tsx
@@ -0,0 +1,28 @@
+import {
+  MessageBlock,
+  ToolCallBlock,
+} from "@/components/ui/message-components";
+import { WebSearchCallItem } from "../utils/item-types";
+
+interface WebSearchItemProps {
+  item: WebSearchCallItem;
+  index: number;
+  keyPrefix: string;
+}
+
+export function WebSearchItemComponent({
+  item,
+  index,
+  keyPrefix,
+}: WebSearchItemProps) {
+  const formattedWebSearch = `web_search_call(status: ${item.status})`;
+
+  return (
+    <MessageBlock
+      key={`${keyPrefix}-${index}`}
+      label="Function Call"
+      labelDetail="(Web Search)"
+      content={<ToolCallBlock>{formattedWebSearch}</ToolCallBlock>}
+    />
+  );
+}
diff --git a/llama_stack/ui/components/responses/responses-detail.test.tsx b/llama_stack/ui/components/responses/responses-detail.test.tsx
new file mode 100644
index 000000000..f426dc059
--- /dev/null
+++ b/llama_stack/ui/components/responses/responses-detail.test.tsx
@@ -0,0 +1,777 @@
+import React from "react";
+import { render, screen } from "@testing-library/react";
+import "@testing-library/jest-dom";
+import { ResponseDetailView } from "./responses-detail";
+import { OpenAIResponse, InputItemListResponse } from "@/lib/types";
+
+describe("ResponseDetailView", () => {
+  const defaultProps = {
+    response: null,
+    inputItems: null,
+    isLoading: false,
+    isLoadingInputItems: false,
+    error: null,
+    inputItemsError: null,
+    id: "test_id",
+  };
+
+  describe("Loading State", () => {
+    test("renders loading skeleton when isLoading is true", () => {
+      const { container } = render(
+        <ResponseDetailView {...defaultProps} isLoading={true} />,
+      );
+
+      // Check for skeleton elements
+      const skeletons = container.querySelectorAll('[data-slot="skeleton"]');
+      expect(skeletons.length).toBeGreaterThan(0);
+
+      // The title is replaced by a skeleton when loading, so we shouldn't expect the text
+    });
+  });
+
+  describe("Error State", () => {
+    test("renders error message when error prop is provided", () => {
+      const errorMessage = "Network Error";
+      render(
+        <ResponseDetailView
+          {...defaultProps}
+          error={{ name: "Error", message: errorMessage }}
+        />,
+      );
+
+      expect(screen.getByText("Responses Details")).toBeInTheDocument();
+      // The error message is split across elements, so we check for parts
+      expect(
+        screen.getByText(/Error loading details for ID/),
+      ).toBeInTheDocument();
+      expect(screen.getByText(/test_id/)).toBeInTheDocument();
+      expect(screen.getByText(/Network Error/)).toBeInTheDocument();
+    });
+
+    test("renders default error message when error.message is not available", () => {
+      render(
+        <ResponseDetailView
+          {...defaultProps}
+          error={{ name: "Error", message: "" }}
+        />,
+      );
+
+      expect(
+        screen.getByText(/Error loading details for ID/),
+      ).toBeInTheDocument();
+      expect(screen.getByText(/test_id/)).toBeInTheDocument();
+    });
+  });
+
+  describe("Not Found State", () => {
+    test("renders not found message when response is null and not loading/error", () => {
+      render(<ResponseDetailView {...defaultProps} response={null} />);
+
+      expect(screen.getByText("Responses Details")).toBeInTheDocument();
+      // The message is split across elements
+      expect(screen.getByText(/No details found for ID:/)).toBeInTheDocument();
+      expect(screen.getByText(/test_id/)).toBeInTheDocument();
+    });
+  });
+
+  describe("Response Data Rendering", () => {
+    const mockResponse: OpenAIResponse = {
+      id: "resp_123",
+      object: "response",
+      created_at: 1710000000,
+      model: "llama-test-model",
+      status: "completed",
+      output: [
+        {
+          type: "message",
+          role: "assistant",
+          content: "Test response output",
+        },
+      ],
+      input: [
+        {
+          type: "message",
+          role: "user",
+          content: "Test input message",
+        },
+      ],
+      temperature: 0.7,
+      top_p: 0.9,
+      parallel_tool_calls: true,
+      previous_response_id: "prev_resp_456",
+    };
+
+    test("renders response data with input and output sections", () => {
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      // Check main sections
+      expect(screen.getByText("Responses Details")).toBeInTheDocument();
+      expect(screen.getByText("Input")).toBeInTheDocument();
+      expect(screen.getByText("Output")).toBeInTheDocument();
+
+      // Check input content
+      expect(screen.getByText("Test input message")).toBeInTheDocument();
+      expect(screen.getByText("User")).toBeInTheDocument();
+
+      // Check output content
+      expect(screen.getByText("Test response output")).toBeInTheDocument();
+      expect(screen.getByText("Assistant")).toBeInTheDocument();
+    });
+
+    test("renders properties sidebar with all response metadata", () => {
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      // Check properties - use regex to handle text split across elements
+      expect(screen.getByText(/Created/)).toBeInTheDocument();
+      expect(
+        screen.getByText(new Date(1710000000 * 1000).toLocaleString()),
+      ).toBeInTheDocument();
+
+      // Check for the specific ID label (not Previous Response ID)
+      expect(
+        screen.getByText((content, element) => {
+          return element?.tagName === "STRONG" && content === "ID:";
+        }),
+      ).toBeInTheDocument();
+      expect(screen.getByText("resp_123")).toBeInTheDocument();
+
+      expect(screen.getByText(/Model/)).toBeInTheDocument();
+      expect(screen.getByText("llama-test-model")).toBeInTheDocument();
+
+      expect(screen.getByText(/Status/)).toBeInTheDocument();
+      expect(screen.getByText("completed")).toBeInTheDocument();
+
+      expect(screen.getByText(/Temperature/)).toBeInTheDocument();
+      expect(screen.getByText("0.7")).toBeInTheDocument();
+
+      expect(screen.getByText(/Top P/)).toBeInTheDocument();
+      expect(screen.getByText("0.9")).toBeInTheDocument();
+
+      expect(screen.getByText(/Parallel Tool Calls/)).toBeInTheDocument();
+      expect(screen.getByText("Yes")).toBeInTheDocument();
+
+      expect(screen.getByText(/Previous Response ID/)).toBeInTheDocument();
+      expect(screen.getByText("prev_resp_456")).toBeInTheDocument();
+    });
+
+    test("handles optional properties correctly", () => {
+      const minimalResponse: OpenAIResponse = {
+        id: "resp_minimal",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [],
+        input: [],
+      };
+
+      render(
+        <ResponseDetailView {...defaultProps} response={minimalResponse} />,
+      );
+
+      // Should show required properties
+      expect(screen.getByText("resp_minimal")).toBeInTheDocument();
+      expect(screen.getByText("test-model")).toBeInTheDocument();
+      expect(screen.getByText("completed")).toBeInTheDocument();
+
+      // Should not show optional properties
+      expect(screen.queryByText("Temperature")).not.toBeInTheDocument();
+      expect(screen.queryByText("Top P")).not.toBeInTheDocument();
+      expect(screen.queryByText("Parallel Tool Calls")).not.toBeInTheDocument();
+      expect(
+        screen.queryByText("Previous Response ID"),
+      ).not.toBeInTheDocument();
+    });
+
+    test("renders error information when response has error", () => {
+      const errorResponse: OpenAIResponse = {
+        ...mockResponse,
+        error: {
+          code: "invalid_request",
+          message: "The request was invalid",
+        },
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={errorResponse} />);
+
+      // The error is shown in the properties sidebar, not as a separate "Error" label
+      expect(
+        screen.getByText("invalid_request: The request was invalid"),
+      ).toBeInTheDocument();
+    });
+  });
+
+  describe("Input Items Handling", () => {
+    const mockResponse: OpenAIResponse = {
+      id: "resp_123",
+      object: "response",
+      created_at: 1710000000,
+      model: "test-model",
+      status: "completed",
+      output: [{ type: "message", role: "assistant", content: "output" }],
+      input: [{ type: "message", role: "user", content: "fallback input" }],
+    };
+
+    test("shows loading state for input items", () => {
+      render(
+        <ResponseDetailView
+          {...defaultProps}
+          response={mockResponse}
+          isLoadingInputItems={true}
+        />,
+      );
+
+      // Check for skeleton loading in input items section
+      const { container } = render(
+        <ResponseDetailView
+          {...defaultProps}
+          response={mockResponse}
+          isLoadingInputItems={true}
+        />,
+      );
+
+      const skeletons = container.querySelectorAll('[data-slot="skeleton"]');
+      expect(skeletons.length).toBeGreaterThan(0);
+    });
+
+    test("shows error message for input items with fallback", () => {
+      render(
+        <ResponseDetailView
+          {...defaultProps}
+          response={mockResponse}
+          inputItemsError={{
+            name: "Error",
+            message: "Failed to load input items",
+          }}
+        />,
+      );
+
+      expect(
+        screen.getByText(
+          "Error loading input items: Failed to load input items",
+        ),
+      ).toBeInTheDocument();
+      expect(
+        screen.getByText("Falling back to response input data."),
+      ).toBeInTheDocument();
+
+      // Should still show fallback input data
+      expect(screen.getByText("fallback input")).toBeInTheDocument();
+    });
+
+    test("uses input items data when available", () => {
+      const mockInputItems: InputItemListResponse = {
+        object: "list",
+        data: [
+          {
+            type: "message",
+            role: "user",
+            content: "input from items API",
+          },
+        ],
+      };
+
+      render(
+        <ResponseDetailView
+          {...defaultProps}
+          response={mockResponse}
+          inputItems={mockInputItems}
+        />,
+      );
+
+      // Should show input items data, not response.input
+      expect(screen.getByText("input from items API")).toBeInTheDocument();
+      expect(screen.queryByText("fallback input")).not.toBeInTheDocument();
+    });
+
+    test("falls back to response.input when input items is empty", () => {
+      const emptyInputItems: InputItemListResponse = {
+        object: "list",
+        data: [],
+      };
+
+      render(
+        <ResponseDetailView
+          {...defaultProps}
+          response={mockResponse}
+          inputItems={emptyInputItems}
+        />,
+      );
+
+      // Should show fallback input data
+      expect(screen.getByText("fallback input")).toBeInTheDocument();
+    });
+
+    test("shows no input message when no data available", () => {
+      const responseWithoutInput: OpenAIResponse = {
+        ...mockResponse,
+        input: [],
+      };
+
+      render(
+        <ResponseDetailView
+          {...defaultProps}
+          response={responseWithoutInput}
+          inputItems={null}
+        />,
+      );
+
+      expect(screen.getByText("No input data available.")).toBeInTheDocument();
+    });
+  });
+
+  describe("Input Display Components", () => {
+    test("renders string content input correctly", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [],
+        input: [
+          {
+            type: "message",
+            role: "user",
+            content: "Simple string input",
+          },
+        ],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(screen.getByText("Simple string input")).toBeInTheDocument();
+      expect(screen.getByText("User")).toBeInTheDocument();
+    });
+
+    test("renders array content input correctly", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [],
+        input: [
+          {
+            type: "message",
+            role: "user",
+            content: [
+              { type: "input_text", text: "First part" },
+              { type: "output_text", text: "Second part" },
+            ],
+          },
+        ],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(screen.getByText("First part Second part")).toBeInTheDocument();
+      expect(screen.getByText("User")).toBeInTheDocument();
+    });
+
+    test("renders non-message input types correctly", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [],
+        input: [
+          {
+            type: "function_call",
+            content: "function call content",
+          },
+        ],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(screen.getByText("function call content")).toBeInTheDocument();
+      // Use getAllByText to find the specific "Input" with the type detail
+      const inputElements = screen.getAllByText("Input");
+      expect(inputElements.length).toBeGreaterThan(0);
+      expect(screen.getByText("(function_call)")).toBeInTheDocument();
+    });
+
+    test("handles input with object content", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [],
+        input: [
+          {
+            type: "custom_type",
+            content: JSON.stringify({ key: "value", nested: { data: "test" } }),
+          },
+        ],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      // Should show JSON stringified content (without quotes around keys in the rendered output)
+      expect(screen.getByText(/key.*value/)).toBeInTheDocument();
+      // Use getAllByText to find the specific "Input" with the type detail
+      const inputElements = screen.getAllByText("Input");
+      expect(inputElements.length).toBeGreaterThan(0);
+      expect(screen.getByText("(custom_type)")).toBeInTheDocument();
+    });
+
+    test("renders function call input correctly", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [],
+        input: [
+          {
+            type: "function_call",
+            id: "call_456",
+            status: "completed",
+            name: "input_function",
+            arguments: '{"param": "value"}',
+          },
+        ],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(
+        screen.getByText('input_function({"param": "value"})'),
+      ).toBeInTheDocument();
+      expect(screen.getByText("Function Call")).toBeInTheDocument();
+    });
+
+    test("renders web search call input correctly", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [],
+        input: [
+          {
+            type: "web_search_call",
+            id: "search_789",
+            status: "completed",
+          },
+        ],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(
+        screen.getByText("web_search_call(status: completed)"),
+      ).toBeInTheDocument();
+      expect(screen.getByText("Function Call")).toBeInTheDocument();
+      expect(screen.getByText("(Web Search)")).toBeInTheDocument();
+    });
+  });
+
+  describe("Output Display Components", () => {
+    test("renders message output with string content", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "message",
+            role: "assistant",
+            content: "Simple string output",
+          },
+        ],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(screen.getByText("Simple string output")).toBeInTheDocument();
+      expect(screen.getByText("Assistant")).toBeInTheDocument();
+    });
+
+    test("renders message output with array content", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "message",
+            role: "assistant",
+            content: [
+              { type: "output_text", text: "First output" },
+              { type: "input_text", text: "Second output" },
+            ],
+          },
+        ],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(
+        screen.getByText("First output Second output"),
+      ).toBeInTheDocument();
+      expect(screen.getByText("Assistant")).toBeInTheDocument();
+    });
+
+    test("renders function call output correctly", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "function_call",
+            id: "call_123",
+            status: "completed",
+            name: "search_function",
+            arguments: '{"query": "test"}',
+          },
+        ],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(
+        screen.getByText('search_function({"query": "test"})'),
+      ).toBeInTheDocument();
+      expect(screen.getByText("Function Call")).toBeInTheDocument();
+    });
+
+    test("renders function call output without arguments", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "function_call",
+            id: "call_123",
+            status: "completed",
+            name: "simple_function",
+          },
+        ],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(screen.getByText("simple_function({})")).toBeInTheDocument();
+      expect(screen.getByText(/Function Call/)).toBeInTheDocument();
+    });
+
+    test("renders web search call output correctly", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "web_search_call",
+            id: "search_123",
+            status: "completed",
+          },
+        ],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(
+        screen.getByText("web_search_call(status: completed)"),
+      ).toBeInTheDocument();
+      expect(screen.getByText(/Function Call/)).toBeInTheDocument();
+      expect(screen.getByText("(Web Search)")).toBeInTheDocument();
+    });
+
+    test("renders unknown output types with JSON fallback", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "unknown_type",
+            custom_field: "custom_value",
+            data: { nested: "object" },
+          } as any,
+        ],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      // Should show JSON stringified content
+      expect(
+        screen.getByText(/custom_field.*custom_value/),
+      ).toBeInTheDocument();
+      expect(screen.getByText("(unknown_type)")).toBeInTheDocument();
+    });
+
+    test("shows no output message when output array is empty", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(screen.getByText("No output data available.")).toBeInTheDocument();
+    });
+
+    test("groups function call with its output correctly", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "function_call",
+            id: "call_123",
+            status: "completed",
+            name: "get_weather",
+            arguments: '{"city": "Tokyo"}',
+          },
+          {
+            type: "message",
+            role: "assistant",
+            call_id: "call_123",
+            content: "sunny and warm",
+          } as any, // Using any to bypass the type restriction for this test
+        ],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      // Should show the function call and message as separate items (not grouped)
+      expect(screen.getByText("Function Call")).toBeInTheDocument();
+      expect(
+        screen.getByText('get_weather({"city": "Tokyo"})'),
+      ).toBeInTheDocument();
+      expect(screen.getByText("Assistant")).toBeInTheDocument();
+      expect(screen.getByText("sunny and warm")).toBeInTheDocument();
+
+      // Should NOT have the grouped "Arguments" and "Output" labels
+      expect(screen.queryByText("Arguments")).not.toBeInTheDocument();
+    });
+
+    test("groups function call with function_call_output correctly", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "function_call",
+            call_id: "call_123",
+            status: "completed",
+            name: "get_weather",
+            arguments: '{"city": "Tokyo"}',
+          },
+          {
+            type: "function_call_output",
+            id: "fc_68364957013081...",
+            status: "completed",
+            call_id: "call_123",
+            output: "sunny and warm",
+          } as any, // Using any to bypass the type restriction for this test
+        ],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      // Should show the function call grouped with its clean output
+      expect(screen.getByText("Function Call")).toBeInTheDocument();
+      expect(screen.getByText("Arguments")).toBeInTheDocument();
+      expect(
+        screen.getByText('get_weather({"city": "Tokyo"})'),
+      ).toBeInTheDocument();
+      // Use getAllByText since there are multiple "Output" elements (card title and output label)
+      const outputElements = screen.getAllByText("Output");
+      expect(outputElements.length).toBeGreaterThan(0);
+      expect(screen.getByText("sunny and warm")).toBeInTheDocument();
+    });
+  });
+
+  describe("Edge Cases and Error Handling", () => {
+    test("handles missing role in message input", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [],
+        input: [
+          {
+            type: "message",
+            content: "Message without role",
+          },
+        ],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      expect(screen.getByText("Message without role")).toBeInTheDocument();
+      expect(screen.getByText("Unknown")).toBeInTheDocument(); // Default role
+    });
+
+    test("handles missing name in function call output", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_123",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "function_call",
+            id: "call_123",
+            status: "completed",
+          },
+        ],
+        input: [],
+      };
+
+      render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
+
+      // When name is missing, it falls back to JSON.stringify of the entire output
+      const functionCallElements = screen.getAllByText(/function_call/);
+      expect(functionCallElements.length).toBeGreaterThan(0);
+      expect(screen.getByText(/call_123/)).toBeInTheDocument();
+    });
+  });
+});
diff --git a/llama_stack/ui/components/responses/responses-detail.tsx b/llama_stack/ui/components/responses/responses-detail.tsx
new file mode 100644
index 000000000..c8c447ba4
--- /dev/null
+++ b/llama_stack/ui/components/responses/responses-detail.tsx
@@ -0,0 +1,171 @@
+"use client";
+
+import { OpenAIResponse, InputItemListResponse } from "@/lib/types";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Skeleton } from "@/components/ui/skeleton";
+import {
+  DetailLoadingView,
+  DetailErrorView,
+  DetailNotFoundView,
+  DetailLayout,
+  PropertiesCard,
+  PropertyItem,
+} from "@/components/layout/detail-layout";
+import { GroupedItemsDisplay } from "./grouping/grouped-items-display";
+
+interface ResponseDetailViewProps {
+  response: OpenAIResponse | null;
+  inputItems: InputItemListResponse | null;
+  isLoading: boolean;
+  isLoadingInputItems: boolean;
+  error: Error | null;
+  inputItemsError: Error | null;
+  id: string;
+}
+
+export function ResponseDetailView({
+  response,
+  inputItems,
+  isLoading,
+  isLoadingInputItems,
+  error,
+  inputItemsError,
+  id,
+}: ResponseDetailViewProps) {
+  const title = "Responses Details";
+
+  if (error) {
+    return <DetailErrorView title={title} id={id} error={error} />;
+  }
+
+  if (isLoading) {
+    return <DetailLoadingView title={title} />;
+  }
+
+  if (!response) {
+    return <DetailNotFoundView title={title} id={id} />;
+  }
+
+  // Main content cards
+  const mainContent = (
+    <>
+      <Card>
+        <CardHeader>
+          <CardTitle>Input</CardTitle>
+        </CardHeader>
+        <CardContent>
+          {/* Show loading state for input items */}
+          {isLoadingInputItems ? (
+            <div className="space-y-2">
+              <Skeleton className="h-4 w-full" />
+              <Skeleton className="h-4 w-3/4" />
+              <Skeleton className="h-4 w-1/2" />
+            </div>
+          ) : inputItemsError ? (
+            <div className="text-red-500 text-sm">
+              Error loading input items: {inputItemsError.message}
+              <br />
+              <span className="text-gray-500 text-xs">
+                Falling back to response input data.
+              </span>
+            </div>
+          ) : null}
+
+          {/* Display input items if available, otherwise fall back to response.input */}
+          {(() => {
+            const dataToDisplay =
+              inputItems?.data && inputItems.data.length > 0
+                ? inputItems.data
+                : response.input;
+
+            if (dataToDisplay && dataToDisplay.length > 0) {
+              return (
+                <GroupedItemsDisplay
+                  items={dataToDisplay}
+                  keyPrefix="input"
+                  defaultRole="unknown"
+                />
+              );
+            } else {
+              return (
+                <p className="text-gray-500 italic text-sm">
+                  No input data available.
+                </p>
+              );
+            }
+          })()}
+        </CardContent>
+      </Card>
+
+      <Card>
+        <CardHeader>
+          <CardTitle>Output</CardTitle>
+        </CardHeader>
+        <CardContent>
+          {response.output?.length > 0 ? (
+            <GroupedItemsDisplay
+              items={response.output}
+              keyPrefix="output"
+              defaultRole="assistant"
+            />
+          ) : (
+            <p className="text-gray-500 italic text-sm">
+              No output data available.
+            </p>
+          )}
+        </CardContent>
+      </Card>
+    </>
+  );
+
+  // Properties sidebar
+  const sidebar = (
+    <PropertiesCard>
+      <PropertyItem
+        label="Created"
+        value={new Date(response.created_at * 1000).toLocaleString()}
+      />
+      <PropertyItem label="ID" value={response.id} />
+      <PropertyItem label="Model" value={response.model} />
+      <PropertyItem label="Status" value={response.status} hasBorder />
+      {response.temperature && (
+        <PropertyItem
+          label="Temperature"
+          value={response.temperature}
+          hasBorder
+        />
+      )}
+      {response.top_p && <PropertyItem label="Top P" value={response.top_p} />}
+      {response.parallel_tool_calls && (
+        <PropertyItem
+          label="Parallel Tool Calls"
+          value={response.parallel_tool_calls ? "Yes" : "No"}
+        />
+      )}
+      {response.previous_response_id && (
+        <PropertyItem
+          label="Previous Response ID"
+          value={
+            <span className="text-xs">{response.previous_response_id}</span>
+          }
+          hasBorder
+        />
+      )}
+      {response.error && (
+        <PropertyItem
+          label="Error"
+          value={
+            <span className="text-red-900 font-medium">
+              {response.error.code}: {response.error.message}
+            </span>
+          }
+          className="pt-1 mt-1 border-t border-red-200"
+        />
+      )}
+    </PropertiesCard>
+  );
+
+  return (
+    <DetailLayout title={title} mainContent={mainContent} sidebar={sidebar} />
+  );
+}
diff --git a/llama_stack/ui/components/responses/responses-table.test.tsx b/llama_stack/ui/components/responses/responses-table.test.tsx
new file mode 100644
index 000000000..7c45c57d3
--- /dev/null
+++ b/llama_stack/ui/components/responses/responses-table.test.tsx
@@ -0,0 +1,537 @@
+import React from "react";
+import { render, screen, fireEvent } from "@testing-library/react";
+import "@testing-library/jest-dom";
+import { ResponsesTable } from "./responses-table";
+import { OpenAIResponse } from "@/lib/types";
+
+// Mock next/navigation
+const mockPush = jest.fn();
+jest.mock("next/navigation", () => ({
+  useRouter: () => ({
+    push: mockPush,
+  }),
+}));
+
+// Mock helper functions
+jest.mock("@/lib/truncate-text");
+
+// Import the mocked functions
+import { truncateText as originalTruncateText } from "@/lib/truncate-text";
+
+// Cast to jest.Mock for typings
+const truncateText = originalTruncateText as jest.Mock;
+
+describe("ResponsesTable", () => {
+  const defaultProps = {
+    data: [] as OpenAIResponse[],
+    isLoading: false,
+    error: null,
+  };
+
+  beforeEach(() => {
+    // Reset all mocks before each test
+    mockPush.mockClear();
+    truncateText.mockClear();
+
+    // Default pass-through implementation
+    truncateText.mockImplementation((text: string | undefined) => text);
+  });
+
+  test("renders without crashing with default props", () => {
+    render(<ResponsesTable {...defaultProps} />);
+    expect(screen.getByText("No responses found.")).toBeInTheDocument();
+  });
+
+  test("click on a row navigates to the correct URL", () => {
+    const mockResponse: OpenAIResponse = {
+      id: "resp_123",
+      object: "response",
+      created_at: Math.floor(Date.now() / 1000),
+      model: "llama-test-model",
+      status: "completed",
+      output: [
+        {
+          type: "message",
+          role: "assistant",
+          content: "Test output",
+        },
+      ],
+      input: [
+        {
+          type: "message",
+          role: "user",
+          content: "Test input",
+        },
+      ],
+    };
+
+    render(<ResponsesTable {...defaultProps} data={[mockResponse]} />);
+
+    const row = screen.getByText("Test input").closest("tr");
+    if (row) {
+      fireEvent.click(row);
+      expect(mockPush).toHaveBeenCalledWith("/logs/responses/resp_123");
+    } else {
+      throw new Error('Row with "Test input" not found for router mock test.');
+    }
+  });
+
+  describe("Loading State", () => {
+    test("renders skeleton UI when isLoading is true", () => {
+      const { container } = render(
+        <ResponsesTable {...defaultProps} isLoading={true} />,
+      );
+
+      // Check for skeleton in the table caption
+      const tableCaption = container.querySelector("caption");
+      expect(tableCaption).toBeInTheDocument();
+      if (tableCaption) {
+        const captionSkeleton = tableCaption.querySelector(
+          '[data-slot="skeleton"]',
+        );
+        expect(captionSkeleton).toBeInTheDocument();
+      }
+
+      // Check for skeletons in the table body cells
+      const tableBody = container.querySelector("tbody");
+      expect(tableBody).toBeInTheDocument();
+      if (tableBody) {
+        const bodySkeletons = tableBody.querySelectorAll(
+          '[data-slot="skeleton"]',
+        );
+        expect(bodySkeletons.length).toBeGreaterThan(0);
+      }
+    });
+  });
+
+  describe("Error State", () => {
+    test("renders error message when error prop is provided", () => {
+      const errorMessage = "Network Error";
+      render(
+        <ResponsesTable
+          {...defaultProps}
+          error={{ name: "Error", message: errorMessage }}
+        />,
+      );
+      expect(
+        screen.getByText(`Error fetching data: ${errorMessage}`),
+      ).toBeInTheDocument();
+    });
+
+    test("renders default error message when error.message is not available", () => {
+      render(
+        <ResponsesTable
+          {...defaultProps}
+          error={{ name: "Error", message: "" }}
+        />,
+      );
+      expect(
+        screen.getByText("Error fetching data: An unknown error occurred"),
+      ).toBeInTheDocument();
+    });
+
+    test("renders default error message when error prop is an object without message", () => {
+      render(<ResponsesTable {...defaultProps} error={{} as Error} />);
+      expect(
+        screen.getByText("Error fetching data: An unknown error occurred"),
+      ).toBeInTheDocument();
+    });
+  });
+
+  describe("Empty State", () => {
+    test('renders "No responses found." and no table when data array is empty', () => {
+      render(<ResponsesTable data={[]} isLoading={false} error={null} />);
+      expect(screen.getByText("No responses found.")).toBeInTheDocument();
+
+      // Ensure that the table structure is NOT rendered in the empty state
+      const table = screen.queryByRole("table");
+      expect(table).not.toBeInTheDocument();
+    });
+  });
+
+  describe("Data Rendering", () => {
+    test("renders table caption, headers, and response data correctly", () => {
+      const mockResponses = [
+        {
+          id: "resp_1",
+          object: "response" as const,
+          created_at: 1710000000,
+          model: "llama-test-model",
+          status: "completed",
+          output: [
+            {
+              type: "message" as const,
+              role: "assistant" as const,
+              content: "Test output",
+            },
+          ],
+          input: [
+            {
+              type: "message",
+              role: "user",
+              content: "Test input",
+            },
+          ],
+        },
+        {
+          id: "resp_2",
+          object: "response" as const,
+          created_at: 1710001000,
+          model: "llama-another-model",
+          status: "completed",
+          output: [
+            {
+              type: "message" as const,
+              role: "assistant" as const,
+              content: "Another output",
+            },
+          ],
+          input: [
+            {
+              type: "message",
+              role: "user",
+              content: "Another input",
+            },
+          ],
+        },
+      ];
+
+      render(
+        <ResponsesTable data={mockResponses} isLoading={false} error={null} />,
+      );
+
+      // Table caption
+      expect(
+        screen.getByText("A list of your recent responses."),
+      ).toBeInTheDocument();
+
+      // Table headers
+      expect(screen.getByText("Input")).toBeInTheDocument();
+      expect(screen.getByText("Output")).toBeInTheDocument();
+      expect(screen.getByText("Model")).toBeInTheDocument();
+      expect(screen.getByText("Created")).toBeInTheDocument();
+
+      // Data rows
+      expect(screen.getByText("Test input")).toBeInTheDocument();
+      expect(screen.getByText("Test output")).toBeInTheDocument();
+      expect(screen.getByText("llama-test-model")).toBeInTheDocument();
+      expect(
+        screen.getByText(new Date(1710000000 * 1000).toLocaleString()),
+      ).toBeInTheDocument();
+
+      expect(screen.getByText("Another input")).toBeInTheDocument();
+      expect(screen.getByText("Another output")).toBeInTheDocument();
+      expect(screen.getByText("llama-another-model")).toBeInTheDocument();
+      expect(
+        screen.getByText(new Date(1710001000 * 1000).toLocaleString()),
+      ).toBeInTheDocument();
+    });
+  });
+
+  describe("Input Text Extraction", () => {
+    test("extracts text from string content", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_string",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [{ type: "message", role: "assistant", content: "output" }],
+        input: [
+          {
+            type: "message",
+            role: "user",
+            content: "Simple string input",
+          },
+        ],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+      expect(screen.getByText("Simple string input")).toBeInTheDocument();
+    });
+
+    test("extracts text from array content with input_text type", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_array",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [{ type: "message", role: "assistant", content: "output" }],
+        input: [
+          {
+            type: "message",
+            role: "user",
+            content: [
+              { type: "input_text", text: "Array input text" },
+              { type: "input_text", text: "Should not be used" },
+            ],
+          },
+        ],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+      expect(screen.getByText("Array input text")).toBeInTheDocument();
+    });
+
+    test("returns empty string when no message input found", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_no_input",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [{ type: "message", role: "assistant", content: "output" }],
+        input: [
+          {
+            type: "other_type",
+            content: "Not a message",
+          },
+        ],
+      };
+
+      const { container } = render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+
+      // Find the input cell (first cell in the data row) and verify it's empty
+      const inputCell = container.querySelector("tbody tr td:first-child");
+      expect(inputCell).toBeInTheDocument();
+      expect(inputCell).toHaveTextContent("");
+    });
+  });
+
+  describe("Output Text Extraction", () => {
+    test("extracts text from string message content", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_string_output",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "message",
+            role: "assistant",
+            content: "Simple string output",
+          },
+        ],
+        input: [{ type: "message", content: "input" }],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+      expect(screen.getByText("Simple string output")).toBeInTheDocument();
+    });
+
+    test("extracts text from array message content with output_text type", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_array_output",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "message",
+            role: "assistant",
+            content: [
+              { type: "output_text", text: "Array output text" },
+              { type: "output_text", text: "Should not be used" },
+            ],
+          },
+        ],
+        input: [{ type: "message", content: "input" }],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+      expect(screen.getByText("Array output text")).toBeInTheDocument();
+    });
+
+    test("formats function call output", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_function_call",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "function_call",
+            id: "call_123",
+            status: "completed",
+            name: "search_function",
+            arguments: '{"query": "test"}',
+          },
+        ],
+        input: [{ type: "message", content: "input" }],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+      expect(
+        screen.getByText('search_function({"query": "test"})'),
+      ).toBeInTheDocument();
+    });
+
+    test("formats function call output without arguments", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_function_no_args",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "function_call",
+            id: "call_123",
+            status: "completed",
+            name: "simple_function",
+          },
+        ],
+        input: [{ type: "message", content: "input" }],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+      expect(screen.getByText("simple_function({})")).toBeInTheDocument();
+    });
+
+    test("formats web search call output", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_web_search",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "web_search_call",
+            id: "search_123",
+            status: "completed",
+          },
+        ],
+        input: [{ type: "message", content: "input" }],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+      expect(
+        screen.getByText("web_search_call(status: completed)"),
+      ).toBeInTheDocument();
+    });
+
+    test("falls back to JSON.stringify for unknown tool call types", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_unknown_tool",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "unknown_call",
+            id: "unknown_123",
+            status: "completed",
+            custom_field: "custom_value",
+          } as any,
+        ],
+        input: [{ type: "message", content: "input" }],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+      // Should contain the JSON stringified version
+      expect(screen.getByText(/unknown_call/)).toBeInTheDocument();
+    });
+
+    test("falls back to JSON.stringify for entire output when no message or tool call found", () => {
+      const mockResponse: OpenAIResponse = {
+        id: "resp_fallback",
+        object: "response",
+        created_at: 1710000000,
+        model: "test-model",
+        status: "completed",
+        output: [
+          {
+            type: "unknown_type",
+            data: "some data",
+          } as any,
+        ],
+        input: [{ type: "message", content: "input" }],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+      // Should contain the JSON stringified version of the output array
+      expect(screen.getByText(/unknown_type/)).toBeInTheDocument();
+    });
+  });
+
+  describe("Text Truncation", () => {
+    test("truncates long input and output text", () => {
+      // Specific mock implementation for this test
+      truncateText.mockImplementation(
+        (text: string | undefined, maxLength?: number) => {
+          const defaultTestMaxLength = 10;
+          const effectiveMaxLength = maxLength ?? defaultTestMaxLength;
+          return typeof text === "string" && text.length > effectiveMaxLength
+            ? text.slice(0, effectiveMaxLength) + "..."
+            : text;
+        },
+      );
+
+      const longInput =
+        "This is a very long input message that should be truncated.";
+      const longOutput =
+        "This is a very long output message that should also be truncated.";
+
+      const mockResponse: OpenAIResponse = {
+        id: "resp_trunc",
+        object: "response",
+        created_at: 1710002000,
+        model: "llama-trunc-model",
+        status: "completed",
+        output: [
+          {
+            type: "message",
+            role: "assistant",
+            content: longOutput,
+          },
+        ],
+        input: [
+          {
+            type: "message",
+            role: "user",
+            content: longInput,
+          },
+        ],
+      };
+
+      render(
+        <ResponsesTable data={[mockResponse]} isLoading={false} error={null} />,
+      );
+
+      // The truncated text should be present for both input and output
+      const truncatedTexts = screen.getAllByText(
+        longInput.slice(0, 10) + "...",
+      );
+      expect(truncatedTexts.length).toBe(2); // one for input, one for output
+      truncatedTexts.forEach((textElement) =>
+        expect(textElement).toBeInTheDocument(),
+      );
+    });
+  });
+});
diff --git a/llama_stack/ui/components/responses/responses-table.tsx b/llama_stack/ui/components/responses/responses-table.tsx
new file mode 100644
index 000000000..352450d18
--- /dev/null
+++ b/llama_stack/ui/components/responses/responses-table.tsx
@@ -0,0 +1,117 @@
+"use client";
+
+import {
+  OpenAIResponse,
+  ResponseInput,
+  ResponseInputMessageContent,
+} from "@/lib/types";
+import { LogsTable, LogTableRow } from "@/components/logs/logs-table";
+import {
+  isMessageInput,
+  isMessageItem,
+  isFunctionCallItem,
+  isWebSearchCallItem,
+  MessageItem,
+  FunctionCallItem,
+  WebSearchCallItem,
+} from "./utils/item-types";
+
+interface ResponsesTableProps {
+  data: OpenAIResponse[];
+  isLoading: boolean;
+  error: Error | null;
+}
+
+function getInputText(response: OpenAIResponse): string {
+  const firstInput = response.input.find(isMessageInput);
+  if (firstInput) {
+    return extractContentFromItem(firstInput);
+  }
+  return "";
+}
+
+function getOutputText(response: OpenAIResponse): string {
+  const firstMessage = response.output.find((item) =>
+    isMessageItem(item as any),
+  );
+  if (firstMessage) {
+    const content = extractContentFromItem(firstMessage as MessageItem);
+    if (content) {
+      return content;
+    }
+  }
+
+  const functionCall = response.output.find((item) =>
+    isFunctionCallItem(item as any),
+  );
+  if (functionCall) {
+    return formatFunctionCall(functionCall as FunctionCallItem);
+  }
+
+  const webSearchCall = response.output.find((item) =>
+    isWebSearchCallItem(item as any),
+  );
+  if (webSearchCall) {
+    return formatWebSearchCall(webSearchCall as WebSearchCallItem);
+  }
+
+  return JSON.stringify(response.output);
+}
+
+function extractContentFromItem(item: {
+  content?: string | ResponseInputMessageContent[];
+}): string {
+  if (!item.content) {
+    return "";
+  }
+
+  if (typeof item.content === "string") {
+    return item.content;
+  } else if (Array.isArray(item.content)) {
+    const textContent = item.content.find(
+      (c: ResponseInputMessageContent) =>
+        c.type === "input_text" || c.type === "output_text",
+    );
+    return textContent?.text || "";
+  }
+  return "";
+}
+
+function formatFunctionCall(functionCall: FunctionCallItem): string {
+  const args = functionCall.arguments || "{}";
+  const name = functionCall.name || "unknown";
+  return `${name}(${args})`;
+}
+
+function formatWebSearchCall(webSearchCall: WebSearchCallItem): string {
+  return `web_search_call(status: ${webSearchCall.status})`;
+}
+
+function formatResponseToRow(response: OpenAIResponse): LogTableRow {
+  return {
+    id: response.id,
+    input: getInputText(response),
+    output: getOutputText(response),
+    model: response.model,
+    createdTime: new Date(response.created_at * 1000).toLocaleString(),
+    detailPath: `/logs/responses/${response.id}`,
+  };
+}
+
+export function ResponsesTable({
+  data,
+  isLoading,
+  error,
+}: ResponsesTableProps) {
+  const formattedData = data.map(formatResponseToRow);
+
+  return (
+    <LogsTable
+      data={formattedData}
+      isLoading={isLoading}
+      error={error}
+      caption="A list of your recent responses."
+      emptyMessage="No responses found."
+    />
+  );
+}
diff --git a/llama_stack/ui/components/responses/utils/item-types.ts b/llama_stack/ui/components/responses/utils/item-types.ts
new file mode 100644
index 000000000..2bde49119
--- /dev/null
+++ b/llama_stack/ui/components/responses/utils/item-types.ts
@@ -0,0 +1,61 @@
+/**
+ * Type guards for different item types in responses
+ */
+
+import type {
+  ResponseInput,
+  ResponseOutput,
+  ResponseMessage,
+  ResponseToolCall,
+} from "@/lib/types";
+
+export interface BaseItem {
+  type: string;
+  [key: string]: unknown;
+}
+
+export type MessageItem = ResponseMessage;
+export type FunctionCallItem = ResponseToolCall & { type: "function_call" };
+export type WebSearchCallItem = ResponseToolCall & { type: "web_search_call" };
+export type FunctionCallOutputItem = BaseItem & {
+  type: "function_call_output";
+  call_id: string;
+  output?: string | object;
+};
+
+export type AnyResponseItem =
+  | ResponseInput
+  | ResponseOutput
+  | FunctionCallOutputItem;
+
+export function isMessageInput(
+  item: ResponseInput,
+): item is ResponseInput & { type: "message" } {
+  return item.type === "message";
+}
+
+export function isMessageItem(item: AnyResponseItem): item is MessageItem {
+  return item.type === "message" && "content" in item;
+}
+
+export function isFunctionCallItem(
+  item: AnyResponseItem,
+): item is FunctionCallItem {
+  return item.type === "function_call" && "name" in item;
+}
+
+export function isWebSearchCallItem(
+  item: AnyResponseItem,
+): item is WebSearchCallItem {
+  return item.type === "web_search_call";
+}
+
+export function isFunctionCallOutputItem(
+  item: AnyResponseItem,
+): item is FunctionCallOutputItem {
+  return (
+    item.type === "function_call_output" &&
+    "call_id" in item &&
+    typeof (item as any).call_id === "string"
+  );
+}
diff --git a/llama_stack/ui/components/ui/breadcrumb.tsx b/llama_stack/ui/components/ui/breadcrumb.tsx
new file mode 100644
index 000000000..f63ae19af
--- /dev/null
+++ b/llama_stack/ui/components/ui/breadcrumb.tsx
@@ -0,0 +1,109 @@
+import * as React from "react";
+import { Slot } from "@radix-ui/react-slot";
+import { ChevronRight, MoreHorizontal } from "lucide-react";
+
+import { cn } from "@/lib/utils";
+
+function Breadcrumb({ ...props }: React.ComponentProps<"nav">) {
+  return <nav aria-label="breadcrumb" data-slot="breadcrumb" {...props} />;
+}
+
+function BreadcrumbList({ className, ...props }: React.ComponentProps<"ol">) {
+  return (
+    <ol
+      data-slot="breadcrumb-list"
+      className={cn(
+        "text-muted-foreground flex flex-wrap items-center gap-1.5 text-sm break-words sm:gap-2.5",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function BreadcrumbItem({ className, ...props }: React.ComponentProps<"li">) {
+  return (
+    <li
+      data-slot="breadcrumb-item"
+      className={cn("inline-flex items-center gap-1.5", className)}
+      {...props}
+    />
+  );
+}
+
+function BreadcrumbLink({
+  asChild,
+  className,
+  ...props
+}: React.ComponentProps<"a"> & {
+  asChild?: boolean;
+}) {
+  const Comp = asChild ? Slot : "a";
+
+  return (
+    <Comp
+      data-slot="breadcrumb-link"
+      className={cn("hover:text-foreground transition-colors", className)}
+      {...props}
+    />
+  );
+}
+
+function BreadcrumbPage({ className, ...props }: React.ComponentProps<"span">) {
+  return (
+    <span
+      data-slot="breadcrumb-page"
+      role="link"
+      aria-disabled="true"
+      aria-current="page"
+      className={cn("text-foreground font-normal", className)}
+      {...props}
+    />
+  );
+}
+
+function BreadcrumbSeparator({
+  children,
+  className,
+  ...props
+}: React.ComponentProps<"li">) {
+  return (
+    <li
+      data-slot="breadcrumb-separator"
+      role="presentation"
+      aria-hidden="true"
+      className={cn("[&>svg]:size-3.5", className)}
+      {...props}
+    >
+      {children ?? <ChevronRight />}
+    </li>
+  );
+}
+
+function BreadcrumbEllipsis({
+  className,
+  ...props
+}: React.ComponentProps<"span">) {
+  return (
+    <span
+      data-slot="breadcrumb-ellipsis"
+      role="presentation"
+      aria-hidden="true"
+      className={cn("flex size-9 items-center justify-center", className)}
+      {...props}
+    >
+      <MoreHorizontal className="size-4" />
+      <span className="sr-only">More</span>
+    </span>
+  );
+}
+
+export {
+  Breadcrumb,
+  BreadcrumbList,
+  BreadcrumbItem,
+  BreadcrumbLink,
+  BreadcrumbPage,
+  BreadcrumbSeparator,
+  BreadcrumbEllipsis,
+};
diff --git a/llama_stack/ui/components/ui/button.tsx b/llama_stack/ui/components/ui/button.tsx
new file mode 100644
index 000000000..2adaf00da
--- /dev/null
+++ b/llama_stack/ui/components/ui/button.tsx
@@ -0,0 +1,59 @@
+import * as React from "react";
+import { Slot } from "@radix-ui/react-slot";
+import { cva, type VariantProps } from "class-variance-authority";
+
+import { cn } from "@/lib/utils";
+
+const buttonVariants = cva(
+  "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4 shrink-0 [&_svg]:shrink-0 outline-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
+  {
+    variants: {
+      variant: {
+        default:
+          "bg-primary text-primary-foreground shadow-xs hover:bg-primary/90",
+        destructive:
+          "bg-destructive text-white shadow-xs hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
+        outline:
+          "border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50",
+        secondary:
+          "bg-secondary text-secondary-foreground shadow-xs hover:bg-secondary/80",
+        ghost:
+          "hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50",
+        link: "text-primary underline-offset-4 hover:underline",
+      },
+      size: {
+        default: "h-9 px-4 py-2 has-[>svg]:px-3",
+        sm: "h-8 rounded-md gap-1.5 px-3 has-[>svg]:px-2.5",
+        lg: "h-10 rounded-md px-6 has-[>svg]:px-4",
+        icon: "size-9",
+      },
+    },
+    defaultVariants: {
+      variant: "default",
+      size: "default",
+    },
+  },
+);
+
+function Button({
+  className,
+  variant,
+  size,
+  asChild = false,
+  ...props
+}: React.ComponentProps<"button"> &
+  VariantProps<typeof buttonVariants> & {
+    asChild?: boolean;
+  }) {
+  const Comp = asChild ? Slot : "button";
+
+  return (
+    <Comp
+      data-slot="button"
+      className={cn(buttonVariants({ variant, size, className }))}
+      {...props}
+    />
+  );
+}
+
+export { Button, buttonVariants };
diff --git a/llama_stack/ui/components/ui/card.tsx b/llama_stack/ui/components/ui/card.tsx
new file mode 100644
index 000000000..113d66c74
--- /dev/null
+++ b/llama_stack/ui/components/ui/card.tsx
@@ -0,0 +1,92 @@
+import * as React from "react";
+
+import { cn } from "@/lib/utils";
+
+function Card({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card"
+      className={cn(
+        "bg-card text-card-foreground flex flex-col gap-6 rounded-xl border py-6 shadow-sm",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-header"
+      className={cn(
+        "@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-1.5 px-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-title"
+      className={cn("leading-none font-semibold", className)}
+      {...props}
+    />
+  );
+}
+
+function CardDescription({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-description"
+      className={cn("text-muted-foreground text-sm", className)}
+      {...props}
+    />
+  );
+}
+
+function CardAction({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-action"
+      className={cn(
+        "col-start-2 row-span-2 row-start-1 self-start justify-self-end",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function CardContent({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-content"
+      className={cn("px-6", className)}
+      {...props}
+    />
+  );
+}
+
+function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-footer"
+      className={cn("flex items-center px-6 [.border-t]:pt-6", className)}
+      {...props}
+    />
+  );
+}
+
+export {
+  Card,
+  CardHeader,
+  CardFooter,
+  CardTitle,
+  CardAction,
+  CardDescription,
+  CardContent,
+};
diff --git a/llama_stack/ui/components/ui/dropdown-menu.tsx b/llama_stack/ui/components/ui/dropdown-menu.tsx
new file mode 100644
index 000000000..1fc1f4ee3
--- /dev/null
+++ b/llama_stack/ui/components/ui/dropdown-menu.tsx
@@ -0,0 +1,257 @@
+"use client";
+
+import * as React from "react";
+import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu";
+import { CheckIcon, ChevronRightIcon, CircleIcon } from "lucide-react";
+
+import { cn } from "@/lib/utils";
+
+function DropdownMenu({
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.Root>) {
+  return <DropdownMenuPrimitive.Root data-slot="dropdown-menu" {...props} />;
+}
+
+function DropdownMenuPortal({
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.Portal>) {
+  return (
+    <DropdownMenuPrimitive.Portal data-slot="dropdown-menu-portal" {...props} />
+  );
+}
+
+function DropdownMenuTrigger({
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.Trigger>) {
+  return (
+    <DropdownMenuPrimitive.Trigger
+      data-slot="dropdown-menu-trigger"
+      {...props}
+    />
+  );
+}
+
+function DropdownMenuContent({
+  className,
+  sideOffset = 4,
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.Content>) {
+  return (
+    <DropdownMenuPrimitive.Portal>
+      <DropdownMenuPrimitive.Content
+        data-slot="dropdown-menu-content"
+        sideOffset={sideOffset}
+        className={cn(
+          "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 max-h-(--radix-dropdown-menu-content-available-height) min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border p-1 shadow-md",
+          className,
+        )}
+        {...props}
+      />
+    </DropdownMenuPrimitive.Portal>
+  );
+}
+
+function DropdownMenuGroup({
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.Group>) {
+  return (
+    <DropdownMenuPrimitive.Group data-slot="dropdown-menu-group" {...props} />
+  );
+}
+
+function DropdownMenuItem({
+  className,
+  inset,
+  variant = "default",
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.Item> & {
+  inset?: boolean;
+  variant?: "default" | "destructive";
+}) {
+  return (
+    <DropdownMenuPrimitive.Item
+      data-slot="dropdown-menu-item"
+      data-inset={inset}
+      data-variant={variant}
+      className={cn(
+        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function DropdownMenuCheckboxItem({
+  className,
+  children,
+  checked,
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.CheckboxItem>) {
+  return (
+    <DropdownMenuPrimitive.CheckboxItem
+      data-slot="dropdown-menu-checkbox-item"
+      className={cn(
+        "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
+        className,
+      )}
+      checked={checked}
+      {...props}
+    >
+      <span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
+        <DropdownMenuPrimitive.ItemIndicator>
+          <CheckIcon className="size-4" />
+        </DropdownMenuPrimitive.ItemIndicator>
+      </span>
+      {children}
+    </DropdownMenuPrimitive.CheckboxItem>
+  );
+}
+
+function DropdownMenuRadioGroup({
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioGroup>) {
+  return (
+    <DropdownMenuPrimitive.RadioGroup
+      data-slot="dropdown-menu-radio-group"
+      {...props}
+    />
+  );
+}
+
+function DropdownMenuRadioItem({
+  className,
+  children,
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioItem>) {
+  return (
+    <DropdownMenuPrimitive.RadioItem
+      data-slot="dropdown-menu-radio-item"
+      className={cn(
+        "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
+        className,
+      )}
+      {...props}
+    >
+      <span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
+        <DropdownMenuPrimitive.ItemIndicator>
+          <CircleIcon className="size-2 fill-current" />
+        </DropdownMenuPrimitive.ItemIndicator>
+      </span>
+      {children}
+    </DropdownMenuPrimitive.RadioItem>
+  );
+}
+
+function DropdownMenuLabel({
+  className,
+  inset,
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.Label> & {
+  inset?: boolean;
+}) {
+  return (
+    <DropdownMenuPrimitive.Label
+      data-slot="dropdown-menu-label"
+      data-inset={inset}
+      className={cn(
+        "px-2 py-1.5 text-sm font-medium data-[inset]:pl-8",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function DropdownMenuSeparator({
+  className,
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.Separator>) {
+  return (
+    <DropdownMenuPrimitive.Separator
+      data-slot="dropdown-menu-separator"
+      className={cn("bg-border -mx-1 my-1 h-px", className)}
+      {...props}
+    />
+  );
+}
+
+function DropdownMenuShortcut({
+  className,
+  ...props
+}: React.ComponentProps<"span">) {
+  return (
+    <span
+      data-slot="dropdown-menu-shortcut"
+      className={cn(
+        "text-muted-foreground ml-auto text-xs tracking-widest",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function DropdownMenuSub({
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.Sub>) {
+  return <DropdownMenuPrimitive.Sub data-slot="dropdown-menu-sub" {...props} />;
+}
+
+function DropdownMenuSubTrigger({
+  className,
+  inset,
+  children,
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.SubTrigger> & {
+  inset?: boolean;
+}) {
+  return (
+    <DropdownMenuPrimitive.SubTrigger
+      data-slot="dropdown-menu-sub-trigger"
+      data-inset={inset}
+      className={cn(
+        "focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex cursor-default items-center rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[inset]:pl-8",
+        className,
+      )}
+      {...props}
+    >
+      {children}
+      <ChevronRightIcon className="ml-auto size-4" />
+    </DropdownMenuPrimitive.SubTrigger>
+  );
+}
+
+function DropdownMenuSubContent({
+  className,
+  ...props
+}: React.ComponentProps<typeof DropdownMenuPrimitive.SubContent>) {
+  return (
+    <DropdownMenuPrimitive.SubContent
+      data-slot="dropdown-menu-sub-content"
+      className={cn(
+        "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-lg",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export {
+  DropdownMenu,
+  DropdownMenuPortal,
+  DropdownMenuTrigger,
+  DropdownMenuContent,
+  DropdownMenuGroup,
+  DropdownMenuLabel,
+  DropdownMenuItem,
+  DropdownMenuCheckboxItem,
+  DropdownMenuRadioGroup,
+  DropdownMenuRadioItem,
+  DropdownMenuSeparator,
+  DropdownMenuShortcut,
+  DropdownMenuSub,
+  DropdownMenuSubTrigger,
+  DropdownMenuSubContent,
+};
diff --git a/llama_stack/ui/components/ui/input.tsx b/llama_stack/ui/components/ui/input.tsx
new file mode 100644
index 000000000..b1a060f50
--- /dev/null
+++ b/llama_stack/ui/components/ui/input.tsx
@@ -0,0 +1,21 @@
+import * as React from "react";
+
+import { cn } from "@/lib/utils";
+
+function Input({ className, type, ...props }: React.ComponentProps<"input">) {
+  return (
+    <input
+      type={type}
+      data-slot="input"
+      className={cn(
+        "file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input flex h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-base shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
+        "focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]",
+        "aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export { Input };
diff --git a/llama_stack/ui/components/ui/message-components.tsx b/llama_stack/ui/components/ui/message-components.tsx
new file mode 100644
index 000000000..50ccd623e
--- /dev/null
+++ b/llama_stack/ui/components/ui/message-components.tsx
@@ -0,0 +1,49 @@
+import React from "react";
+
+export interface MessageBlockProps {
+  label: string;
+  labelDetail?: string;
+  content: React.ReactNode;
+  className?: string;
+  contentClassName?: string;
+}
+
+export const MessageBlock: React.FC<MessageBlockProps> = ({
+  label,
+  labelDetail,
+  content,
+  className = "",
+  contentClassName = "",
+}) => {
+  return (
+    <div className={`mb-4 ${className}`}>
+      <p className="py-1 font-semibold text-gray-800 mb-1">
+        {label}
+        {labelDetail && (
+          <span className="text-xs text-gray-500 font-normal ml-1">
+            {labelDetail}
+          </span>
+        )}
+      </p>
+      <div className={`py-1 whitespace-pre-wrap ${contentClassName}`}>
+        {content}
+      </div>
+    </div>
+  );
+};
+
+export interface ToolCallBlockProps {
+  children: React.ReactNode;
+  className?: string;
+}
+
+export const ToolCallBlock = ({ children, className }: ToolCallBlockProps) => {
+  const baseClassName =
+    "p-3 bg-slate-50 border border-slate-200 rounded-md text-sm";
+
+  return (
+    <div className={`${baseClassName} ${className || ""}`}>
+      <pre className="whitespace-pre-wrap text-xs">{children}</pre>
+    </div>
+  );
+};
diff --git a/llama_stack/ui/components/ui/mode-toggle.tsx b/llama_stack/ui/components/ui/mode-toggle.tsx
new file mode 100644
index 000000000..92640161e
--- /dev/null
+++ b/llama_stack/ui/components/ui/mode-toggle.tsx
@@ -0,0 +1,40 @@
+"use client";
+
+import * as React from "react";
+import { Moon, Sun } from "lucide-react";
+import { useTheme } from "next-themes";
+
+import { Button } from "@/components/ui/button";
+import {
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuItem,
+  DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
+
+export function ModeToggle() {
+  const { setTheme } = useTheme();
+
+  return (
+    <DropdownMenu>
+      <DropdownMenuTrigger asChild>
+        <Button variant="outline" size="icon">
+          <Sun className="h-[1.2rem] w-[1.2rem] rotate-0 scale-100 transition-all dark:-rotate-90 dark:scale-0" />
+          <Moon className="absolute h-[1.2rem] w-[1.2rem] rotate-90 scale-0 transition-all dark:rotate-0 dark:scale-100" />
+          <span className="sr-only">Toggle theme</span>
+        </Button>
+      </DropdownMenuTrigger>
+      <DropdownMenuContent align="end">
+        <DropdownMenuItem onClick={() => setTheme("light")}>
+          Light
+        </DropdownMenuItem>
+        <DropdownMenuItem onClick={() => setTheme("dark")}>
+          Dark
+        </DropdownMenuItem>
+        <DropdownMenuItem onClick={() => setTheme("system")}>
+          System
+        </DropdownMenuItem>
+      </DropdownMenuContent>
+    </DropdownMenu>
+  );
+}
diff --git a/llama_stack/ui/components/ui/separator.tsx b/llama_stack/ui/components/ui/separator.tsx
new file mode 100644
index 000000000..06d1380a9
--- /dev/null
+++ b/llama_stack/ui/components/ui/separator.tsx
@@ -0,0 +1,28 @@
+"use client";
+
+import * as React from "react";
+import * as SeparatorPrimitive from "@radix-ui/react-separator";
+
+import { cn } from "@/lib/utils";
+
+function Separator({
+  className,
+  orientation = "horizontal",
+  decorative = true,
+  ...props
+}: React.ComponentProps<typeof SeparatorPrimitive.Root>) {
+  return (
+    <SeparatorPrimitive.Root
+      data-slot="separator-root"
+      decorative={decorative}
+      orientation={orientation}
+      className={cn(
+        "bg-border shrink-0 data-[orientation=horizontal]:h-px data-[orientation=horizontal]:w-full data-[orientation=vertical]:h-full data-[orientation=vertical]:w-px",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export { Separator };
diff --git a/llama_stack/ui/components/ui/sheet.tsx b/llama_stack/ui/components/ui/sheet.tsx
new file mode 100644
index 000000000..d30779f4f
--- /dev/null
+++ b/llama_stack/ui/components/ui/sheet.tsx
@@ -0,0 +1,139 @@
+"use client";
+
+import * as React from "react";
+import * as SheetPrimitive from "@radix-ui/react-dialog";
+import { XIcon } from "lucide-react";
+
+import { cn } from "@/lib/utils";
+
+function Sheet({ ...props }: React.ComponentProps<typeof SheetPrimitive.Root>) {
+  return <SheetPrimitive.Root data-slot="sheet" {...props} />;
+}
+
+function SheetTrigger({
+  ...props
+}: React.ComponentProps<typeof SheetPrimitive.Trigger>) {
+  return <SheetPrimitive.Trigger data-slot="sheet-trigger" {...props} />;
+}
+
+function SheetClose({
+  ...props
+}: React.ComponentProps<typeof SheetPrimitive.Close>) {
+  return <SheetPrimitive.Close data-slot="sheet-close" {...props} />;
+}
+
+function SheetPortal({
+  ...props
+}: React.ComponentProps<typeof SheetPrimitive.Portal>) {
+  return <SheetPrimitive.Portal data-slot="sheet-portal" {...props} />;
+}
+
+function SheetOverlay({
+  className,
+  ...props
+}: React.ComponentProps<typeof SheetPrimitive.Overlay>) {
+  return (
+    <SheetPrimitive.Overlay
+      data-slot="sheet-overlay"
+      className={cn(
+        "data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-black/50",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function SheetContent({
+  className,
+  children,
+  side = "right",
+  ...props
+}: React.ComponentProps<typeof SheetPrimitive.Content> & {
+  side?: "top" | "right" | "bottom" | "left";
+}) {
+  return (
+    <SheetPortal>
+      <SheetOverlay />
+      <SheetPrimitive.Content
+        data-slot="sheet-content"
+        className={cn(
+          "bg-background data-[state=open]:animate-in data-[state=closed]:animate-out fixed z-50 flex flex-col gap-4 shadow-lg transition ease-in-out data-[state=closed]:duration-300 data-[state=open]:duration-500",
+          side === "right" &&
+            "data-[state=closed]:slide-out-to-right data-[state=open]:slide-in-from-right inset-y-0 right-0 h-full w-3/4 border-l sm:max-w-sm",
+          side === "left" &&
+            "data-[state=closed]:slide-out-to-left data-[state=open]:slide-in-from-left inset-y-0 left-0 h-full w-3/4 border-r sm:max-w-sm",
+          side === "top" &&
+            "data-[state=closed]:slide-out-to-top data-[state=open]:slide-in-from-top inset-x-0 top-0 h-auto border-b",
+          side === "bottom" &&
+            "data-[state=closed]:slide-out-to-bottom data-[state=open]:slide-in-from-bottom inset-x-0 bottom-0 h-auto border-t",
+          className,
+        )}
+        {...props}
+      >
+        {children}
+        <SheetPrimitive.Close className="ring-offset-background focus:ring-ring data-[state=open]:bg-secondary absolute top-4 right-4 rounded-xs opacity-70 transition-opacity hover:opacity-100 focus:ring-2 focus:ring-offset-2 focus:outline-hidden disabled:pointer-events-none">
+          <XIcon className="size-4" />
+          <span className="sr-only">Close</span>
+        </SheetPrimitive.Close>
+      </SheetPrimitive.Content>
+    </SheetPortal>
+  );
+}
+
+function SheetHeader({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="sheet-header"
+      className={cn("flex flex-col gap-1.5 p-4", className)}
+      {...props}
+    />
+  );
+}
+
+function SheetFooter({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="sheet-footer"
+      className={cn("mt-auto flex flex-col gap-2 p-4", className)}
+      {...props}
+    />
+  );
+}
+
+function SheetTitle({
+  className,
+  ...props
+}: React.ComponentProps<typeof SheetPrimitive.Title>) {
+  return (
+    <SheetPrimitive.Title
+      data-slot="sheet-title"
+      className={cn("text-foreground font-semibold", className)}
+      {...props}
+    />
+  );
+}
+
+function SheetDescription({
+  className,
+  ...props
+}: React.ComponentProps<typeof SheetPrimitive.Description>) {
+  return (
+    <SheetPrimitive.Description
+      data-slot="sheet-description"
+      className={cn("text-muted-foreground text-sm", className)}
+      {...props}
+    />
+  );
+}
+
+export {
+  Sheet,
+  SheetTrigger,
+  SheetClose,
+  SheetContent,
+  SheetHeader,
+  SheetFooter,
+  SheetTitle,
+  SheetDescription,
+};
diff --git a/llama_stack/ui/components/ui/sidebar.tsx b/llama_stack/ui/components/ui/sidebar.tsx
new file mode 100644
index 000000000..f8a0a3ed5
--- /dev/null
+++ b/llama_stack/ui/components/ui/sidebar.tsx
@@ -0,0 +1,726 @@
+"use client";
+
+import * as React from "react";
+import { Slot } from "@radix-ui/react-slot";
+import { VariantProps, cva } from "class-variance-authority";
+import { PanelLeftIcon } from "lucide-react";
+
+import { useIsMobile } from "@/hooks/use-mobile";
+import { cn } from "@/lib/utils";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Separator } from "@/components/ui/separator";
+import {
+  Sheet,
+  SheetContent,
+  SheetDescription,
+  SheetHeader,
+  SheetTitle,
+} from "@/components/ui/sheet";
+import { Skeleton } from "@/components/ui/skeleton";
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipProvider,
+  TooltipTrigger,
+} from "@/components/ui/tooltip";
+
+const SIDEBAR_COOKIE_NAME = "sidebar_state";
+const SIDEBAR_COOKIE_MAX_AGE = 60 * 60 * 24 * 7;
+const SIDEBAR_WIDTH = "16rem";
+const SIDEBAR_WIDTH_MOBILE = "18rem";
+const SIDEBAR_WIDTH_ICON = "3rem";
+const SIDEBAR_KEYBOARD_SHORTCUT = "b";
+
+type SidebarContextProps = {
+  state: "expanded" | "collapsed";
+  open: boolean;
+  setOpen: (open: boolean) => void;
+  openMobile: boolean;
+  setOpenMobile: (open: boolean) => void;
+  isMobile: boolean;
+  toggleSidebar: () => void;
+};
+
+const SidebarContext = React.createContext<SidebarContextProps | null>(null);
+
+function useSidebar() {
+  const context = React.useContext(SidebarContext);
+  if (!context) {
+    throw new Error("useSidebar must be used within a SidebarProvider.");
+  }
+
+  return context;
+}
+
+function SidebarProvider({
+  defaultOpen = true,
+  open: openProp,
+  onOpenChange: setOpenProp,
+  className,
+  style,
+  children,
+  ...props
+}: React.ComponentProps<"div"> & {
+  defaultOpen?: boolean;
+  open?: boolean;
+  onOpenChange?: (open: boolean) => void;
+}) {
+  const isMobile = useIsMobile();
+  const [openMobile, setOpenMobile] = React.useState(false);
+
+  // This is the internal state of the sidebar.
+  // We use openProp and setOpenProp for control from outside the component.
+  const [_open, _setOpen] = React.useState(defaultOpen);
+  const open = openProp ?? _open;
+  const setOpen = React.useCallback(
+    (value: boolean | ((value: boolean) => boolean)) => {
+      const openState = typeof value === "function" ? value(open) : value;
+      if (setOpenProp) {
+        setOpenProp(openState);
+      } else {
+        _setOpen(openState);
+      }
+
+      // This sets the cookie to keep the sidebar state.
+      document.cookie = `${SIDEBAR_COOKIE_NAME}=${openState}; path=/; max-age=${SIDEBAR_COOKIE_MAX_AGE}`;
+    },
+    [setOpenProp, open],
+  );
+
+  // Helper to toggle the sidebar.
+  const toggleSidebar = React.useCallback(() => {
+    return isMobile ? setOpenMobile((open) => !open) : setOpen((open) => !open);
+  }, [isMobile, setOpen, setOpenMobile]);
+
+  // Adds a keyboard shortcut to toggle the sidebar.
+  React.useEffect(() => {
+    const handleKeyDown = (event: KeyboardEvent) => {
+      if (
+        event.key === SIDEBAR_KEYBOARD_SHORTCUT &&
+        (event.metaKey || event.ctrlKey)
+      ) {
+        event.preventDefault();
+        toggleSidebar();
+      }
+    };
+
+    window.addEventListener("keydown", handleKeyDown);
+    return () => window.removeEventListener("keydown", handleKeyDown);
+  }, [toggleSidebar]);
+
+  // We add a state so that we can do data-state="expanded" or "collapsed".
+  // This makes it easier to style the sidebar with Tailwind classes.
+  const state = open ? "expanded" : "collapsed";
+
+  const contextValue = React.useMemo<SidebarContextProps>(
+    () => ({
+      state,
+      open,
+      setOpen,
+      isMobile,
+      openMobile,
+      setOpenMobile,
+      toggleSidebar,
+    }),
+    [state, open, setOpen, isMobile, openMobile, setOpenMobile, toggleSidebar],
+  );
+
+  return (
+    <SidebarContext.Provider value={contextValue}>
+      <TooltipProvider delayDuration={0}>
+        <div
+          data-slot="sidebar-wrapper"
+          style={
+            {
+              "--sidebar-width": SIDEBAR_WIDTH,
+              "--sidebar-width-icon": SIDEBAR_WIDTH_ICON,
+              ...style,
+            } as React.CSSProperties
+          }
+          className={cn(
+            "group/sidebar-wrapper has-data-[variant=inset]:bg-sidebar flex min-h-svh w-full",
+            className,
+          )}
+          {...props}
+        >
+          {children}
+        </div>
+      </TooltipProvider>
+    </SidebarContext.Provider>
+  );
+}
+
+function Sidebar({
+  side = "left",
+  variant = "sidebar",
+  collapsible = "offcanvas",
+  className,
+  children,
+  ...props
+}: React.ComponentProps<"div"> & {
+  side?: "left" | "right";
+  variant?: "sidebar" | "floating" | "inset";
+  collapsible?: "offcanvas" | "icon" | "none";
+}) {
+  const { isMobile, state, openMobile, setOpenMobile } = useSidebar();
+
+  if (collapsible === "none") {
+    return (
+      <div
+        data-slot="sidebar"
+        className={cn(
+          "bg-sidebar text-sidebar-foreground flex h-full w-(--sidebar-width) flex-col",
+          className,
+        )}
+        {...props}
+      >
+        {children}
+      </div>
+    );
+  }
+
+  if (isMobile) {
+    return (
+      <Sheet open={openMobile} onOpenChange={setOpenMobile} {...props}>
+        <SheetContent
+          data-sidebar="sidebar"
+          data-slot="sidebar"
+          data-mobile="true"
+          className="bg-sidebar text-sidebar-foreground w-(--sidebar-width) p-0 [&>button]:hidden"
+          style={
+            {
+              "--sidebar-width": SIDEBAR_WIDTH_MOBILE,
+            } as React.CSSProperties
+          }
+          side={side}
+        >
+          <SheetHeader className="sr-only">
+            <SheetTitle>Sidebar</SheetTitle>
+            <SheetDescription>Displays the mobile sidebar.</SheetDescription>
+          </SheetHeader>
+          <div className="flex h-full w-full flex-col">{children}</div>
+        </SheetContent>
+      </Sheet>
+    );
+  }
+
+  return (
+    <div
+      className="group peer text-sidebar-foreground hidden md:block"
+      data-state={state}
+      data-collapsible={state === "collapsed" ? collapsible : ""}
+      data-variant={variant}
+      data-side={side}
+      data-slot="sidebar"
+    >
+      {/* This is what handles the sidebar gap on desktop */}
+      <div
+        data-slot="sidebar-gap"
+        className={cn(
+          "relative w-(--sidebar-width) bg-transparent transition-[width] duration-200 ease-linear",
+          "group-data-[collapsible=offcanvas]:w-0",
+          "group-data-[side=right]:rotate-180",
+          variant === "floating" || variant === "inset"
+            ? "group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4)))]"
+            : "group-data-[collapsible=icon]:w-(--sidebar-width-icon)",
+        )}
+      />
+      <div
+        data-slot="sidebar-container"
+        className={cn(
+          "fixed inset-y-0 z-10 hidden h-svh w-(--sidebar-width) transition-[left,right,width] duration-200 ease-linear md:flex",
+          side === "left"
+            ? "left-0 group-data-[collapsible=offcanvas]:left-[calc(var(--sidebar-width)*-1)]"
+            : "right-0 group-data-[collapsible=offcanvas]:right-[calc(var(--sidebar-width)*-1)]",
+          // Adjust the padding for floating and inset variants.
+          variant === "floating" || variant === "inset"
+            ? "p-2 group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4))+2px)]"
+            : "group-data-[collapsible=icon]:w-(--sidebar-width-icon) group-data-[side=left]:border-r group-data-[side=right]:border-l",
+          className,
+        )}
+        {...props}
+      >
+        <div
+          data-sidebar="sidebar"
+          data-slot="sidebar-inner"
+          className="bg-sidebar group-data-[variant=floating]:border-sidebar-border flex h-full w-full flex-col group-data-[variant=floating]:rounded-lg group-data-[variant=floating]:border group-data-[variant=floating]:shadow-sm"
+        >
+          {children}
+        </div>
+      </div>
+    </div>
+  );
+}
+
+function SidebarTrigger({
+  className,
+  onClick,
+  ...props
+}: React.ComponentProps<typeof Button>) {
+  const { toggleSidebar } = useSidebar();
+
+  return (
+    <Button
+      data-sidebar="trigger"
+      data-slot="sidebar-trigger"
+      variant="ghost"
+      size="icon"
+      className={cn("size-7", className)}
+      onClick={(event) => {
+        onClick?.(event);
+        toggleSidebar();
+      }}
+      {...props}
+    >
+      <PanelLeftIcon />
+      <span className="sr-only">Toggle Sidebar</span>
+    </Button>
+  );
+}
+
+function SidebarRail({ className, ...props }: React.ComponentProps<"button">) {
+  const { toggleSidebar } = useSidebar();
+
+  return (
+    <button
+      data-sidebar="rail"
+      data-slot="sidebar-rail"
+      aria-label="Toggle Sidebar"
+      tabIndex={-1}
+      onClick={toggleSidebar}
+      title="Toggle Sidebar"
+      className={cn(
+        "hover:after:bg-sidebar-border absolute inset-y-0 z-20 hidden w-4 -translate-x-1/2 transition-all ease-linear group-data-[side=left]:-right-4 group-data-[side=right]:left-0 after:absolute after:inset-y-0 after:left-1/2 after:w-[2px] sm:flex",
+        "in-data-[side=left]:cursor-w-resize in-data-[side=right]:cursor-e-resize",
+        "[[data-side=left][data-state=collapsed]_&]:cursor-e-resize [[data-side=right][data-state=collapsed]_&]:cursor-w-resize",
+        "hover:group-data-[collapsible=offcanvas]:bg-sidebar group-data-[collapsible=offcanvas]:translate-x-0 group-data-[collapsible=offcanvas]:after:left-full",
+        "[[data-side=left][data-collapsible=offcanvas]_&]:-right-2",
+        "[[data-side=right][data-collapsible=offcanvas]_&]:-left-2",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function SidebarInset({ className, ...props }: React.ComponentProps<"main">) {
+  return (
+    <main
+      data-slot="sidebar-inset"
+      className={cn(
+        "bg-background relative flex w-full flex-1 flex-col",
+        "md:peer-data-[variant=inset]:m-2 md:peer-data-[variant=inset]:ml-0 md:peer-data-[variant=inset]:rounded-xl md:peer-data-[variant=inset]:shadow-sm md:peer-data-[variant=inset]:peer-data-[state=collapsed]:ml-2",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function SidebarInput({
+  className,
+  ...props
+}: React.ComponentProps<typeof Input>) {
+  return (
+    <Input
+      data-slot="sidebar-input"
+      data-sidebar="input"
+      className={cn("bg-background h-8 w-full shadow-none", className)}
+      {...props}
+    />
+  );
+}
+
+function SidebarHeader({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="sidebar-header"
+      data-sidebar="header"
+      className={cn("flex flex-col gap-2 p-2", className)}
+      {...props}
+    />
+  );
+}
+
+function SidebarFooter({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="sidebar-footer"
+      data-sidebar="footer"
+      className={cn("flex flex-col gap-2 p-2", className)}
+      {...props}
+    />
+  );
+}
+
+function SidebarSeparator({
+  className,
+  ...props
+}: React.ComponentProps<typeof Separator>) {
+  return (
+    <Separator
+      data-slot="sidebar-separator"
+      data-sidebar="separator"
+      className={cn("bg-sidebar-border mx-2 w-auto", className)}
+      {...props}
+    />
+  );
+}
+
+function SidebarContent({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="sidebar-content"
+      data-sidebar="content"
+      className={cn(
+        "flex min-h-0 flex-1 flex-col gap-2 overflow-auto group-data-[collapsible=icon]:overflow-hidden",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function SidebarGroup({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="sidebar-group"
+      data-sidebar="group"
+      className={cn("relative flex w-full min-w-0 flex-col p-2", className)}
+      {...props}
+    />
+  );
+}
+
+function SidebarGroupLabel({
+  className,
+  asChild = false,
+  ...props
+}: React.ComponentProps<"div"> & { asChild?: boolean }) {
+  const Comp = asChild ? Slot : "div";
+
+  return (
+    <Comp
+      data-slot="sidebar-group-label"
+      data-sidebar="group-label"
+      className={cn(
+        "text-sidebar-foreground/70 ring-sidebar-ring flex h-8 shrink-0 items-center rounded-md px-2 text-xs font-medium outline-hidden transition-[margin,opacity] duration-200 ease-linear focus-visible:ring-2 [&>svg]:size-4 [&>svg]:shrink-0",
+        "group-data-[collapsible=icon]:-mt-8 group-data-[collapsible=icon]:opacity-0",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function SidebarGroupAction({
+  className,
+  asChild = false,
+  ...props
+}: React.ComponentProps<"button"> & { asChild?: boolean }) {
+  const Comp = asChild ? Slot : "button";
+
+  return (
+    <Comp
+      data-slot="sidebar-group-action"
+      data-sidebar="group-action"
+      className={cn(
+        "text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground absolute top-3.5 right-3 flex aspect-square w-5 items-center justify-center rounded-md p-0 outline-hidden transition-transform focus-visible:ring-2 [&>svg]:size-4 [&>svg]:shrink-0",
+        // Increases the hit area of the button on mobile.
+        "after:absolute after:-inset-2 md:after:hidden",
+        "group-data-[collapsible=icon]:hidden",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function SidebarGroupContent({
+  className,
+  ...props
+}: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="sidebar-group-content"
+      data-sidebar="group-content"
+      className={cn("w-full text-sm", className)}
+      {...props}
+    />
+  );
+}
+
+function SidebarMenu({ className, ...props }: React.ComponentProps<"ul">) {
+  return (
+    <ul
+      data-slot="sidebar-menu"
+      data-sidebar="menu"
+      className={cn("flex w-full min-w-0 flex-col gap-1", className)}
+      {...props}
+    />
+  );
+}
+
+function SidebarMenuItem({ className, ...props }: React.ComponentProps<"li">) {
+  return (
+    <li
+      data-slot="sidebar-menu-item"
+      data-sidebar="menu-item"
+      className={cn("group/menu-item relative", className)}
+      {...props}
+    />
+  );
+}
+
+const sidebarMenuButtonVariants = cva(
+  "peer/menu-button flex w-full items-center gap-2 overflow-hidden rounded-md p-2 text-left text-sm outline-hidden ring-sidebar-ring transition-[width,height,padding] hover:bg-sidebar-accent hover:text-sidebar-accent-foreground focus-visible:ring-2 active:bg-sidebar-accent active:text-sidebar-accent-foreground disabled:pointer-events-none disabled:opacity-50 group-has-data-[sidebar=menu-action]/menu-item:pr-8 aria-disabled:pointer-events-none aria-disabled:opacity-50 data-[active=true]:bg-sidebar-accent data-[active=true]:font-medium data-[active=true]:text-sidebar-accent-foreground data-[state=open]:hover:bg-sidebar-accent data-[state=open]:hover:text-sidebar-accent-foreground group-data-[collapsible=icon]:size-8! group-data-[collapsible=icon]:p-2! [&>span:last-child]:truncate [&>svg]:size-4 [&>svg]:shrink-0",
+  {
+    variants: {
+      variant: {
+        default: "hover:bg-sidebar-accent hover:text-sidebar-accent-foreground",
+        outline:
+          "bg-background shadow-[0_0_0_1px_hsl(var(--sidebar-border))] hover:bg-sidebar-accent hover:text-sidebar-accent-foreground hover:shadow-[0_0_0_1px_hsl(var(--sidebar-accent))]",
+      },
+      size: {
+        default: "h-8 text-sm",
+        sm: "h-7 text-xs",
+        lg: "h-12 text-sm group-data-[collapsible=icon]:p-0!",
+      },
+    },
+    defaultVariants: {
+      variant: "default",
+      size: "default",
+    },
+  },
+);
+
+function SidebarMenuButton({
+  asChild = false,
+  isActive = false,
+  variant = "default",
+  size = "default",
+  tooltip,
+  className,
+  ...props
+}: React.ComponentProps<"button"> & {
+  asChild?: boolean;
+  isActive?: boolean;
+  tooltip?: string | React.ComponentProps<typeof TooltipContent>;
+} & VariantProps<typeof sidebarMenuButtonVariants>) {
+  const Comp = asChild ? Slot : "button";
+  const { isMobile, state } = useSidebar();
+
+  const button = (
+    <Comp
+      data-slot="sidebar-menu-button"
+      data-sidebar="menu-button"
+      data-size={size}
+      data-active={isActive}
+      className={cn(sidebarMenuButtonVariants({ variant, size }), className)}
+      {...props}
+    />
+  );
+
+  if (!tooltip) {
+    return button;
+  }
+
+  if (typeof tooltip === "string") {
+    tooltip = {
+      children: tooltip,
+    };
+  }
+
+  return (
+    <Tooltip>
+      <TooltipTrigger asChild>{button}</TooltipTrigger>
+      <TooltipContent
+        side="right"
+        align="center"
+        hidden={state !== "collapsed" || isMobile}
+        {...tooltip}
+      />
+    </Tooltip>
+  );
+}
+
+function SidebarMenuAction({
+  className,
+  asChild = false,
+  showOnHover = false,
+  ...props
+}: React.ComponentProps<"button"> & {
+  asChild?: boolean;
+  showOnHover?: boolean;
+}) {
+  const Comp = asChild ? Slot : "button";
+
+  return (
+    <Comp
+      data-slot="sidebar-menu-action"
+      data-sidebar="menu-action"
+      className={cn(
+        "text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground peer-hover/menu-button:text-sidebar-accent-foreground absolute top-1.5 right-1 flex aspect-square w-5 items-center justify-center rounded-md p-0 outline-hidden transition-transform focus-visible:ring-2 [&>svg]:size-4 [&>svg]:shrink-0",
+        // Increases the hit area of the button on mobile.
+        "after:absolute after:-inset-2 md:after:hidden",
+        "peer-data-[size=sm]/menu-button:top-1",
+        "peer-data-[size=default]/menu-button:top-1.5",
+        "peer-data-[size=lg]/menu-button:top-2.5",
+        "group-data-[collapsible=icon]:hidden",
+        showOnHover &&
+          "peer-data-[active=true]/menu-button:text-sidebar-accent-foreground group-focus-within/menu-item:opacity-100 group-hover/menu-item:opacity-100 data-[state=open]:opacity-100 md:opacity-0",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function SidebarMenuBadge({
+  className,
+  ...props
+}: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="sidebar-menu-badge"
+      data-sidebar="menu-badge"
+      className={cn(
+        "text-sidebar-foreground pointer-events-none absolute right-1 flex h-5 min-w-5 items-center justify-center rounded-md px-1 text-xs font-medium tabular-nums select-none",
+        "peer-hover/menu-button:text-sidebar-accent-foreground peer-data-[active=true]/menu-button:text-sidebar-accent-foreground",
+        "peer-data-[size=sm]/menu-button:top-1",
+        "peer-data-[size=default]/menu-button:top-1.5",
+        "peer-data-[size=lg]/menu-button:top-2.5",
+        "group-data-[collapsible=icon]:hidden",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function SidebarMenuSkeleton({
+  className,
+  showIcon = false,
+  ...props
+}: React.ComponentProps<"div"> & {
+  showIcon?: boolean;
+}) {
+  // Random width between 50 to 90%.
+  const width = React.useMemo(() => {
+    return `${Math.floor(Math.random() * 40) + 50}%`;
+  }, []);
+
+  return (
+    <div
+      data-slot="sidebar-menu-skeleton"
+      data-sidebar="menu-skeleton"
+      className={cn("flex h-8 items-center gap-2 rounded-md px-2", className)}
+      {...props}
+    >
+      {showIcon && (
+        <Skeleton
+          className="size-4 rounded-md"
+          data-sidebar="menu-skeleton-icon"
+        />
+      )}
+      <Skeleton
+        className="h-4 max-w-(--skeleton-width) flex-1"
+        data-sidebar="menu-skeleton-text"
+        style={
+          {
+            "--skeleton-width": width,
+          } as React.CSSProperties
+        }
+      />
+    </div>
+  );
+}
+
+function SidebarMenuSub({ className, ...props }: React.ComponentProps<"ul">) {
+  return (
+    <ul
+      data-slot="sidebar-menu-sub"
+      data-sidebar="menu-sub"
+      className={cn(
+        "border-sidebar-border mx-3.5 flex min-w-0 translate-x-px flex-col gap-1 border-l px-2.5 py-0.5",
+        "group-data-[collapsible=icon]:hidden",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function SidebarMenuSubItem({
+  className,
+  ...props
+}: React.ComponentProps<"li">) {
+  return (
+    <li
+      data-slot="sidebar-menu-sub-item"
+      data-sidebar="menu-sub-item"
+      className={cn("group/menu-sub-item relative", className)}
+      {...props}
+    />
+  );
+}
+
+function SidebarMenuSubButton({
+  asChild = false,
+  size = "md",
+  isActive = false,
+  className,
+  ...props
+}: React.ComponentProps<"a"> & {
+  asChild?: boolean;
+  size?: "sm" | "md";
+  isActive?: boolean;
+}) {
+  const Comp = asChild ? Slot : "a";
+
+  return (
+    <Comp
+      data-slot="sidebar-menu-sub-button"
+      data-sidebar="menu-sub-button"
+      data-size={size}
+      data-active={isActive}
+      className={cn(
+        "text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground active:bg-sidebar-accent active:text-sidebar-accent-foreground [&>svg]:text-sidebar-accent-foreground flex h-7 min-w-0 -translate-x-px items-center gap-2 overflow-hidden rounded-md px-2 outline-hidden focus-visible:ring-2 disabled:pointer-events-none disabled:opacity-50 aria-disabled:pointer-events-none aria-disabled:opacity-50 [&>span:last-child]:truncate [&>svg]:size-4 [&>svg]:shrink-0",
+        "data-[active=true]:bg-sidebar-accent data-[active=true]:text-sidebar-accent-foreground",
+        size === "sm" && "text-xs",
+        size === "md" && "text-sm",
+        "group-data-[collapsible=icon]:hidden",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export {
+  Sidebar,
+  SidebarContent,
+  SidebarFooter,
+  SidebarGroup,
+  SidebarGroupAction,
+  SidebarGroupContent,
+  SidebarGroupLabel,
+  SidebarHeader,
+  SidebarInput,
+  SidebarInset,
+  SidebarMenu,
+  SidebarMenuAction,
+  SidebarMenuBadge,
+  SidebarMenuButton,
+  SidebarMenuItem,
+  SidebarMenuSkeleton,
+  SidebarMenuSub,
+  SidebarMenuSubButton,
+  SidebarMenuSubItem,
+  SidebarProvider,
+  SidebarRail,
+  SidebarSeparator,
+  SidebarTrigger,
+  useSidebar,
+};
diff --git a/llama_stack/ui/components/ui/skeleton.tsx b/llama_stack/ui/components/ui/skeleton.tsx
new file mode 100644
index 000000000..01689981c
--- /dev/null
+++ b/llama_stack/ui/components/ui/skeleton.tsx
@@ -0,0 +1,13 @@
+import { cn } from "@/lib/utils";
+
+function Skeleton({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="skeleton"
+      className={cn("bg-accent animate-pulse rounded-md", className)}
+      {...props}
+    />
+  );
+}
+
+export { Skeleton };
diff --git a/llama_stack/ui/components/ui/table.tsx b/llama_stack/ui/components/ui/table.tsx
new file mode 100644
index 000000000..4b3c98ea4
--- /dev/null
+++ b/llama_stack/ui/components/ui/table.tsx
@@ -0,0 +1,116 @@
+"use client";
+
+import * as React from "react";
+
+import { cn } from "@/lib/utils";
+
+function Table({ className, ...props }: React.ComponentProps<"table">) {
+  return (
+    <div
+      data-slot="table-container"
+      className="relative w-full overflow-x-auto"
+    >
+      <table
+        data-slot="table"
+        className={cn("w-full caption-bottom text-sm", className)}
+        {...props}
+      />
+    </div>
+  );
+}
+
+function TableHeader({ className, ...props }: React.ComponentProps<"thead">) {
+  return (
+    <thead
+      data-slot="table-header"
+      className={cn("[&_tr]:border-b", className)}
+      {...props}
+    />
+  );
+}
+
+function TableBody({ className, ...props }: React.ComponentProps<"tbody">) {
+  return (
+    <tbody
+      data-slot="table-body"
+      className={cn("[&_tr:last-child]:border-0", className)}
+      {...props}
+    />
+  );
+}
+
+function TableFooter({ className, ...props }: React.ComponentProps<"tfoot">) {
+  return (
+    <tfoot
+      data-slot="table-footer"
+      className={cn(
+        "bg-muted/50 border-t font-medium [&>tr]:last:border-b-0",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function TableRow({ className, ...props }: React.ComponentProps<"tr">) {
+  return (
+    <tr
+      data-slot="table-row"
+      className={cn(
+        "hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function TableHead({ className, ...props }: React.ComponentProps<"th">) {
+  return (
+    <th
+      data-slot="table-head"
+      className={cn(
+        "text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&:has([role=checkbox])]:pr-0 [&>[role=checkbox]]:translate-y-[2px]",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function TableCell({ className, ...props }: React.ComponentProps<"td">) {
+  return (
+    <td
+      data-slot="table-cell"
+      className={cn(
+        "p-2 align-middle whitespace-nowrap [&:has([role=checkbox])]:pr-0 [&>[role=checkbox]]:translate-y-[2px]",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function TableCaption({
+  className,
+  ...props
+}: React.ComponentProps<"caption">) {
+  return (
+    <caption
+      data-slot="table-caption"
+      className={cn("text-muted-foreground mt-4 text-sm", className)}
+      {...props}
+    />
+  );
+}
+
+export {
+  Table,
+  TableHeader,
+  TableBody,
+  TableFooter,
+  TableHead,
+  TableRow,
+  TableCell,
+  TableCaption,
+};
diff --git a/llama_stack/ui/components/ui/theme-provider.tsx b/llama_stack/ui/components/ui/theme-provider.tsx
new file mode 100644
index 000000000..189a2b1a1
--- /dev/null
+++ b/llama_stack/ui/components/ui/theme-provider.tsx
@@ -0,0 +1,11 @@
+"use client";
+
+import * as React from "react";
+import { ThemeProvider as NextThemesProvider } from "next-themes";
+
+export function ThemeProvider({
+  children,
+  ...props
+}: React.ComponentProps<typeof NextThemesProvider>) {
+  return <NextThemesProvider {...props}>{children}</NextThemesProvider>;
+}
diff --git a/llama_stack/ui/components/ui/tooltip.tsx b/llama_stack/ui/components/ui/tooltip.tsx
new file mode 100644
index 000000000..bf4a342a9
--- /dev/null
+++ b/llama_stack/ui/components/ui/tooltip.tsx
@@ -0,0 +1,61 @@
+"use client";
+
+import * as React from "react";
+import * as TooltipPrimitive from "@radix-ui/react-tooltip";
+
+import { cn } from "@/lib/utils";
+
+function TooltipProvider({
+  delayDuration = 0,
+  ...props
+}: React.ComponentProps<typeof TooltipPrimitive.Provider>) {
+  return (
+    <TooltipPrimitive.Provider
+      data-slot="tooltip-provider"
+      delayDuration={delayDuration}
+      {...props}
+    />
+  );
+}
+
+function Tooltip({
+  ...props
+}: React.ComponentProps<typeof TooltipPrimitive.Root>) {
+  return (
+    <TooltipProvider>
+      <TooltipPrimitive.Root data-slot="tooltip" {...props} />
+    </TooltipProvider>
+  );
+}
+
+function TooltipTrigger({
+  ...props
+}: React.ComponentProps<typeof TooltipPrimitive.Trigger>) {
+  return <TooltipPrimitive.Trigger data-slot="tooltip-trigger" {...props} />;
+}
+
+function TooltipContent({
+  className,
+  sideOffset = 0,
+  children,
+  ...props
+}: React.ComponentProps<typeof TooltipPrimitive.Content>) {
+  return (
+    <TooltipPrimitive.Portal>
+      <TooltipPrimitive.Content
+        data-slot="tooltip-content"
+        sideOffset={sideOffset}
+        className={cn(
+          "bg-primary text-primary-foreground animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-fit origin-(--radix-tooltip-content-transform-origin) rounded-md px-3 py-1.5 text-xs text-balance",
+          className,
+        )}
+        {...props}
+      >
+        {children}
+        <TooltipPrimitive.Arrow className="bg-primary fill-primary z-50 size-2.5 translate-y-[calc(-50%_-_2px)] rotate-45 rounded-[2px]" />
+      </TooltipPrimitive.Content>
+    </TooltipPrimitive.Portal>
+  );
+}
+
+export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider };
diff --git a/llama_stack/ui/eslint.config.mjs b/llama_stack/ui/eslint.config.mjs
new file mode 100644
index 000000000..c85fb67c4
--- /dev/null
+++ b/llama_stack/ui/eslint.config.mjs
@@ -0,0 +1,16 @@
+import { dirname } from "path";
+import { fileURLToPath } from "url";
+import { FlatCompat } from "@eslint/eslintrc";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const compat = new FlatCompat({
+  baseDirectory: __dirname,
+});
+
+const eslintConfig = [
+  ...compat.extends("next/core-web-vitals", "next/typescript"),
+];
+
+export default eslintConfig;
diff --git a/llama_stack/ui/hooks/use-mobile.ts b/llama_stack/ui/hooks/use-mobile.ts
new file mode 100644
index 000000000..a93d58393
--- /dev/null
+++ b/llama_stack/ui/hooks/use-mobile.ts
@@ -0,0 +1,21 @@
+import * as React from "react";
+
+const MOBILE_BREAKPOINT = 768;
+
+export function useIsMobile() {
+  const [isMobile, setIsMobile] = React.useState<boolean | undefined>(
+    undefined,
+  );
+
+  React.useEffect(() => {
+    const mql = window.matchMedia(`(max-width: ${MOBILE_BREAKPOINT - 1}px)`);
+    const onChange = () => {
+      setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);
+    };
+    mql.addEventListener("change", onChange);
+    setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);
+    return () => mql.removeEventListener("change", onChange);
+  }, []);
+
+  return !!isMobile;
+}
diff --git a/llama_stack/ui/jest.config.ts b/llama_stack/ui/jest.config.ts
new file mode 100644
index 000000000..b94f03450
--- /dev/null
+++ b/llama_stack/ui/jest.config.ts
@@ -0,0 +1,210 @@
+/**
+ * For a detailed explanation regarding each configuration property, visit:
+ * https://jestjs.io/docs/configuration
+ */
+
+import type { Config } from "jest";
+import nextJest from "next/jest.js";
+
+const createJestConfig = nextJest({
+  // Provide the path to your Next.js app to load next.config.js and .env files in your test environment
+  dir: "./",
+});
+const config: Config = {
+  // All imported modules in your tests should be mocked automatically
+  // automock: false,
+
+  // Stop running tests after `n` failures
+  // bail: 0,
+
+  // The directory where Jest should store its cached dependency information
+  // cacheDirectory: "/private/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/jest_dx",
+
+  // Automatically clear mock calls, instances, contexts and results before every test
+  // clearMocks: false,
+
+  // Indicates whether the coverage information should be collected while executing the test
+  // collectCoverage: false,
+
+  // An array of glob patterns indicating a set of files for which coverage information should be collected
+  // collectCoverageFrom: undefined,
+
+  // The directory where Jest should output its coverage files
+  // coverageDirectory: undefined,
+
+  // An array of regexp pattern strings used to skip coverage collection
+  // coveragePathIgnorePatterns: [
+  //   "/node_modules/"
+  // ],
+
+  // Indicates which provider should be used to instrument code for coverage
+  coverageProvider: "v8",
+
+  // A list of reporter names that Jest uses when writing coverage reports
+  // coverageReporters: [
+  //   "json",
+  //   "text",
+  //   "lcov",
+  //   "clover"
+  // ],
+
+  // An object that configures minimum threshold enforcement for coverage results
+  // coverageThreshold: undefined,
+
+  // A path to a custom dependency extractor
+  // dependencyExtractor: undefined,
+
+  // Make calling deprecated APIs throw helpful error messages
+  // errorOnDeprecated: false,
+
+  // The default configuration for fake timers
+  // fakeTimers: {
+  //   "enableGlobally": false
+  // },
+
+  // Force coverage collection from ignored files using an array of glob patterns
+  // forceCoverageMatch: [],
+
+  // A path to a module which exports an async function that is triggered once before all test suites
+  // globalSetup: undefined,
+
+  // A path to a module which exports an async function that is triggered once after all test suites
+  // globalTeardown: undefined,
+
+  // A set of global variables that need to be available in all test environments
+  // globals: {},
+
+  // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
+  // maxWorkers: "50%",
+
+  // An array of directory names to be searched recursively up from the requiring module's location
+  // moduleDirectories: [
+  //   "node_modules"
+  // ],
+
+  // An array of file extensions your modules use
+  // moduleFileExtensions: [
+  //   "js",
+  //   "mjs",
+  //   "cjs",
+  //   "jsx",
+  //   "ts",
+  //   "tsx",
+  //   "json",
+  //   "node"
+  // ],
+
+  // A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
+  moduleNameMapper: {
+    // Handle module aliases (this will be automatically configured by Next.js)
+    // However, for mocks, sometimes explicit mapping is needed.
+    "^@/lib/(.*)$": "<rootDir>/lib/$1",
+    "^@/components/(.*)$": "<rootDir>/components/$1",
+    // Add other aliases here if needed
+  },
+
+  // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
+  // modulePathIgnorePatterns: [],
+
+  // Activates notifications for test results
+  // notify: false,
+
+  // An enum that specifies notification mode. Requires { notify: true }
+  // notifyMode: "failure-change",
+
+  // A preset that is used as a base for Jest's configuration
+  // preset: undefined,
+
+  // Run tests from one or more projects
+  // projects: undefined,
+
+  // Use this configuration option to add custom reporters to Jest
+  // reporters: undefined,
+
+  // Automatically reset mock state before every test
+  // resetMocks: false,
+
+  // Reset the module registry before running each individual test
+  // resetModules: false,
+
+  // A path to a custom resolver
+  // resolver: undefined,
+
+  // Automatically restore mock state and implementation before every test
+  // restoreMocks: false,
+
+  // The root directory that Jest should scan for tests and modules within
+  // rootDir: undefined,
+
+  // A list of paths to directories that Jest should use to search for files in
+  // roots: [
+  //   "<rootDir>"
+  // ],
+
+  // Allows you to use a custom runner instead of Jest's default test runner
+  // runner: "jest-runner",
+
+  // The paths to modules that run some code to configure or set up the testing environment before each test
+  // setupFiles: [],
+
+  // A list of paths to modules that run some code to configure or set up the testing framework before each test
+  // setupFilesAfterEnv: [],
+
+  // The number of seconds after which a test is considered as slow and reported as such in the results.
+  // slowTestThreshold: 5,
+
+  // A list of paths to snapshot serializer modules Jest should use for snapshot testing
+  // snapshotSerializers: [],
+
+  // The test environment that will be used for testing
+  testEnvironment: "jsdom",
+
+  // Options that will be passed to the testEnvironment
+  // testEnvironmentOptions: {},
+
+  // Adds a location field to test results
+  // testLocationInResults: false,
+
+  // The glob patterns Jest uses to detect test files
+  // testMatch: [
+  //   "**/__tests__/**/*.[jt]s?(x)",
+  //   "**/?(*.)+(spec|test).[tj]s?(x)"
+  // ],
+
+  // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
+  // testPathIgnorePatterns: [
+  //   "/node_modules/"
+  // ],
+
+  // The regexp pattern or array of patterns that Jest uses to detect test files
+  // testRegex: [],
+
+  // This option allows the use of a custom results processor
+  // testResultsProcessor: undefined,
+
+  // This option allows use of a custom test runner
+  // testRunner: "jest-circus/runner",
+
+  // A map from regular expressions to paths to transformers
+  // transform: undefined,
+
+  // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
+  // transformIgnorePatterns: [
+  //   "/node_modules/",
+  //   "\\.pnp\\.[^\\/]+$"
+  // ],
+
+  // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
+  // unmockedModulePathPatterns: undefined,
+
+  // Indicates whether each individual test should be reported during the run
+  // verbose: undefined,
+
+  // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
+  // watchPathIgnorePatterns: [],
+
+  // Whether to use watchman for file crawling
+  // watchman: true,
+};
+
+export default createJestConfig(config);
diff --git a/llama_stack/ui/lib/client.ts b/llama_stack/ui/lib/client.ts
new file mode 100644
index 000000000..df2a8e2f2
--- /dev/null
+++ b/llama_stack/ui/lib/client.ts
@@ -0,0 +1,12 @@
+import LlamaStackClient from "llama-stack-client";
+import OpenAI from "openai";
+
+export const client =
+  process.env.NEXT_PUBLIC_USE_OPENAI_CLIENT === "true" // useful for testing
+    ? new OpenAI({
+        apiKey: process.env.NEXT_PUBLIC_OPENAI_API_KEY,
+        dangerouslyAllowBrowser: true,
+      })
+    : new LlamaStackClient({
+        baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL,
+      });
diff --git a/llama_stack/ui/lib/format-message-content.test.ts b/llama_stack/ui/lib/format-message-content.test.ts
new file mode 100644
index 000000000..cf4055b51
--- /dev/null
+++ b/llama_stack/ui/lib/format-message-content.test.ts
@@ -0,0 +1,193 @@
+import {
+  extractTextFromContentPart,
+  extractDisplayableText,
+} from "./format-message-content";
+import { ChatMessage } from "@/lib/types";
+
+describe("extractTextFromContentPart", () => {
+  it("should return an empty string for null or undefined input", () => {
+    expect(extractTextFromContentPart(null)).toBe("");
+    expect(extractTextFromContentPart(undefined)).toBe("");
+  });
+
+  it("should return the string itself if input is a string", () => {
+    expect(extractTextFromContentPart("Hello, world!")).toBe("Hello, world!");
+    expect(extractTextFromContentPart("")).toBe("");
+  });
+
+  it("should extract text from an array of text content objects", () => {
+    const content = [{ type: "text", text: "Which planet do humans live on?" }];
+    expect(extractTextFromContentPart(content)).toBe(
+      "Which planet do humans live on?",
+    );
+  });
+
+  it("should join text from multiple text content objects in an array", () => {
+    const content = [
+      { type: "text", text: "Hello," },
+      { type: "text", text: "world!" },
+    ];
+    expect(extractTextFromContentPart(content)).toBe("Hello, world!");
+  });
+
+  it("should handle mixed text and image_url types in an array", () => {
+    const content = [
+      { type: "text", text: "Look at this:" },
+      { type: "image_url", image_url: { url: "http://example.com/image.png" } },
+      { type: "text", text: "It's an image." },
+    ];
+    expect(extractTextFromContentPart(content)).toBe(
+      "Look at this: [Image] It's an image.",
+    );
+  });
+
+  it("should return '[Image]' for an array with only an image_url object", () => {
+    const content = [
+      { type: "image_url", image_url: { url: "http://example.com/image.png" } },
+    ];
+    expect(extractTextFromContentPart(content)).toBe("[Image]");
+  });
+
+  it("should return an empty string for an empty array", () => {
+    expect(extractTextFromContentPart([])).toBe("");
+  });
+
+  it("should handle arrays with plain strings", () => {
+    const content = ["This is", " a test."] as any;
+    expect(extractTextFromContentPart(content)).toBe("This is  a test.");
+  });
+
+  it("should filter out malformed or unrecognized objects in an array", () => {
+    const content = [
+      { type: "text", text: "Valid" },
+      { type: "unknown" },
+      { text: "Missing type" },
+      null,
+      undefined,
+      { type: "text", noTextProperty: true },
+    ] as any;
+    expect(extractTextFromContentPart(content)).toBe("Valid");
+  });
+
+  it("should handle an array of mixed valid items and plain strings", () => {
+    const content = [
+      { type: "text", text: "First part." },
+      "Just a string.",
+      { type: "image_url", image_url: { url: "http://example.com/image.png" } },
+      { type: "text", text: "Last part." },
+    ] as any;
+    expect(extractTextFromContentPart(content)).toBe(
+      "First part. Just a string. [Image] Last part.",
+    );
+  });
+});
+
+describe("extractDisplayableText (composite function)", () => {
+  const mockFormatToolCallToString = (toolCall: any) => {
+    if (!toolCall || !toolCall.function || !toolCall.function.name) return "";
+    const args = toolCall.function.arguments
+      ? JSON.stringify(toolCall.function.arguments)
+      : "";
+    return `${toolCall.function.name}(${args})`;
+  };
+
+  it("should return empty string for null or undefined message", () => {
+    expect(extractDisplayableText(null)).toBe("");
+    expect(extractDisplayableText(undefined)).toBe("");
+  });
+
+  it("should return only content part if no tool calls", () => {
+    const message: ChatMessage = {
+      role: "assistant",
+      content: "Hello there!",
+    };
+    expect(extractDisplayableText(message)).toBe("Hello there!");
+  });
+
+  it("should return only content part for complex content if no tool calls", () => {
+    const message: ChatMessage = {
+      role: "user",
+      content: [
+        { type: "text", text: "Part 1" },
+        { type: "text", text: "Part 2" },
+      ],
+    };
+    expect(extractDisplayableText(message)).toBe("Part 1 Part 2");
+  });
+
+  it("should return only formatted tool call if content is empty or null", () => {
+    const toolCall = {
+      function: { name: "search", arguments: { query: "cats" } },
+    };
+    const messageWithEffectivelyEmptyContent: ChatMessage = {
+      role: "assistant",
+      content: "",
+      tool_calls: [toolCall],
+    };
+    expect(extractDisplayableText(messageWithEffectivelyEmptyContent)).toBe(
+      mockFormatToolCallToString(toolCall),
+    );
+
+    const messageWithEmptyContent: ChatMessage = {
+      role: "assistant",
+      content: "",
+      tool_calls: [toolCall],
+    };
+    expect(extractDisplayableText(messageWithEmptyContent)).toBe(
+      mockFormatToolCallToString(toolCall),
+    );
+  });
+
+  it("should combine content and formatted tool call", () => {
+    const toolCall = {
+      function: { name: "calculator", arguments: { expr: "2+2" } },
+    };
+    const message: ChatMessage = {
+      role: "assistant",
+      content: "The result is:",
+      tool_calls: [toolCall],
+    };
+    const expectedToolCallStr = mockFormatToolCallToString(toolCall);
+    expect(extractDisplayableText(message)).toBe(
+      `The result is: ${expectedToolCallStr}`,
+    );
+  });
+
+  it("should handle message with content an array and a tool call", () => {
+    const toolCall = {
+      function: { name: "get_weather", arguments: { city: "London" } },
+    };
+    const message: ChatMessage = {
+      role: "assistant",
+      content: [
+        { type: "text", text: "Okay, checking weather for" },
+        { type: "text", text: "London." },
+      ],
+      tool_calls: [toolCall],
+    };
+    const expectedToolCallStr = mockFormatToolCallToString(toolCall);
+    expect(extractDisplayableText(message)).toBe(
+      `Okay, checking weather for London. ${expectedToolCallStr}`,
+    );
+  });
+
+  it("should return only content if tool_calls array is empty or undefined", () => {
+    const messageEmptyToolCalls: ChatMessage = {
+      role: "assistant",
+      content: "No tools here.",
+      tool_calls: [],
+    };
+    expect(extractDisplayableText(messageEmptyToolCalls)).toBe(
+      "No tools here.",
+    );
+
+    const messageUndefinedToolCalls: ChatMessage = {
+      role: "assistant",
+      content: "Still no tools.",
+      tool_calls: undefined,
+    };
+    expect(extractDisplayableText(messageUndefinedToolCalls)).toBe(
+      "Still no tools.",
+    );
+  });
+});
diff --git a/llama_stack/ui/lib/format-message-content.ts b/llama_stack/ui/lib/format-message-content.ts
new file mode 100644
index 000000000..3e7e03a12
--- /dev/null
+++ b/llama_stack/ui/lib/format-message-content.ts
@@ -0,0 +1,65 @@
+import { ChatMessage, ChatMessageContentPart } from "@/lib/types";
+import { formatToolCallToString } from "@/lib/format-tool-call";
+
+export function extractTextFromContentPart(
+  content: string | ChatMessageContentPart[] | null | undefined,
+): string {
+  if (content === null || content === undefined) {
+    return "";
+  }
+  if (typeof content === "string") {
+    return content;
+  } else if (Array.isArray(content)) {
+    const parts: string[] = [];
+    for (const item of content) {
+      if (
+        item &&
+        typeof item === "object" &&
+        item.type === "text" &&
+        typeof item.text === "string"
+      ) {
+        parts.push(item.text);
+      } else if (
+        item &&
+        typeof item === "object" &&
+        item.type === "image_url"
+      ) {
+        parts.push("[Image]"); // Placeholder for images
+      } else if (typeof item === "string") {
+        // Handle cases where an array might contain plain strings
+        parts.push(item);
+      }
+    }
+    return parts.join(" ");
+  } else {
+    return content;
+  }
+}
+
+export function extractDisplayableText(
+  message: ChatMessage | undefined | null,
+): string {
+  if (!message) {
+    return "";
+  }
+
+  const textPart = extractTextFromContentPart(message.content);
+  let toolCallPart = "";
+
+  if (
+    message.tool_calls &&
+    Array.isArray(message.tool_calls) &&
+    message.tool_calls.length > 0
+  ) {
+    // For summary, usually the first tool call is sufficient
+    toolCallPart = formatToolCallToString(message.tool_calls[0]);
+  }
+
+  if (textPart && toolCallPart) {
+    return `${textPart} ${toolCallPart}`;
+  } else if (toolCallPart) {
+    return toolCallPart;
+  } else {
+    return textPart; // textPart will be "" if message.content was initially null/undefined/empty array etc.
+  }
+}
diff --git a/llama_stack/ui/lib/format-tool-call.tsx b/llama_stack/ui/lib/format-tool-call.tsx
new file mode 100644
index 000000000..f6a286a6e
--- /dev/null
+++ b/llama_stack/ui/lib/format-tool-call.tsx
@@ -0,0 +1,33 @@
+/**
+ * Formats a tool_call object into a string representation.
+ * Example: "functionName(argumentsString)"
+ * @param toolCall The tool_call object, expected to have a `function` property
+ *                 with `name` and `arguments`.
+ * @returns A formatted string or an empty string if data is malformed.
+ */
+export function formatToolCallToString(toolCall: any): string {
+  if (
+    !toolCall ||
+    !toolCall.function ||
+    typeof toolCall.function.name !== "string" ||
+    toolCall.function.arguments === undefined
+  ) {
+    return "";
+  }
+
+  const name = toolCall.function.name;
+  const args = toolCall.function.arguments;
+  let argsString: string;
+
+  if (typeof args === "string") {
+    argsString = args;
+  } else {
+    try {
+      argsString = JSON.stringify(args);
+    } catch (error) {
+      return "";
+    }
+  }
+
+  return `${name}(${argsString})`;
+}
diff --git a/llama_stack/ui/lib/truncate-text.ts b/llama_stack/ui/lib/truncate-text.ts
new file mode 100644
index 000000000..63e2194f5
--- /dev/null
+++ b/llama_stack/ui/lib/truncate-text.ts
@@ -0,0 +1,8 @@
+export function truncateText(
+  text: string | null | undefined,
+  maxLength: number = 50,
+): string {
+  if (!text) return "N/A";
+  if (text.length <= maxLength) return text;
+  return text.substring(0, maxLength) + "...";
+}
diff --git a/llama_stack/ui/lib/types.ts b/llama_stack/ui/lib/types.ts
new file mode 100644
index 000000000..e08fb8d82
--- /dev/null
+++ b/llama_stack/ui/lib/types.ts
@@ -0,0 +1,103 @@
+export interface TextContentBlock {
+  type: "text";
+  text: string;
+}
+
+export interface ImageUrlDetail {
+  url: string;
+  detail?: "low" | "high" | "auto";
+}
+
+export interface ImageUrlContentBlock {
+  type: "image_url";
+  // Support both simple URL string and detailed object, though our parser currently just looks for type: "image_url"
+  image_url: string | ImageUrlDetail;
+}
+
+// Union of known content part types. Add more specific types as needed.
+export type ChatMessageContentPart =
+  | TextContentBlock
+  | ImageUrlContentBlock
+  | { type: string; [key: string]: unknown }; // Fallback for other potential types
+
+export interface ChatMessage {
+  role: string;
+  content: string | ChatMessageContentPart[]; // Updated content type
+  name?: string | null;
+  tool_calls?: unknown | null; // This could also be refined to a more specific ToolCall[] type
+}
+
+export interface Choice {
+  message: ChatMessage;
+  finish_reason: string;
+  index: number;
+  logprobs?: unknown | null;
+}
+
+export interface ChatCompletion {
+  id: string;
+  choices: Choice[];
+  object: string;
+  created: number;
+  model: string;
+  input_messages: ChatMessage[];
+}
+
+// Response types for OpenAI Responses API
+export interface ResponseInputMessageContent {
+  text?: string;
+  type: "input_text" | "input_image" | "output_text";
+  image_url?: string;
+  detail?: "low" | "high" | "auto";
+}
+
+export interface ResponseMessage {
+  content: string | ResponseInputMessageContent[];
+  role: "system" | "developer" | "user" | "assistant";
+  type: "message";
+  id?: string;
+  status?: string;
+}
+
+export interface ResponseToolCall {
+  id: string;
+  status: string;
+  type: "web_search_call" | "function_call";
+  arguments?: string;
+  call_id?: string;
+  name?: string;
+}
+
+export type ResponseOutput = ResponseMessage | ResponseToolCall;
+
+export interface ResponseInput {
+  type: string;
+  content?: string | ResponseInputMessageContent[];
+  role?: string;
+  [key: string]: unknown; // Flexible for various input types
+}
+
+export interface OpenAIResponse {
+  id: string;
+  created_at: number;
+  model: string;
+  object: "response";
+  status: string;
+  output: ResponseOutput[];
+  input: ResponseInput[];
+  error?: {
+    code: string;
+    message: string;
+  };
+  parallel_tool_calls?: boolean;
+  previous_response_id?: string;
+  temperature?: number;
+  top_p?: number;
+  truncation?: string;
+  user?: string;
+}
+
+export interface InputItemListResponse {
+  data: ResponseInput[];
+  object: "list";
+}
diff --git a/llama_stack/ui/lib/utils.tsx b/llama_stack/ui/lib/utils.tsx
new file mode 100644
index 000000000..a5ef19350
--- /dev/null
+++ b/llama_stack/ui/lib/utils.tsx
@@ -0,0 +1,6 @@
+import { clsx, type ClassValue } from "clsx";
+import { twMerge } from "tailwind-merge";
+
+export function cn(...inputs: ClassValue[]) {
+  return twMerge(clsx(inputs));
+}
diff --git a/llama_stack/ui/next.config.ts b/llama_stack/ui/next.config.ts
new file mode 100644
index 000000000..e9ffa3083
--- /dev/null
+++ b/llama_stack/ui/next.config.ts
@@ -0,0 +1,7 @@
+import type { NextConfig } from "next";
+
+const nextConfig: NextConfig = {
+  /* config options here */
+};
+
+export default nextConfig;
diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json
new file mode 100644
index 000000000..931faa60a
--- /dev/null
+++ b/llama_stack/ui/package-lock.json
@@ -0,0 +1,12400 @@
+{
+  "name": "ui",
+  "version": "0.1.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "ui",
+      "version": "0.1.0",
+      "dependencies": {
+        "@radix-ui/react-dialog": "^1.1.13",
+        "@radix-ui/react-dropdown-menu": "^2.1.14",
+        "@radix-ui/react-separator": "^1.1.6",
+        "@radix-ui/react-slot": "^1.2.2",
+        "@radix-ui/react-tooltip": "^1.2.6",
+        "class-variance-authority": "^0.7.1",
+        "clsx": "^2.1.1",
+        "llama-stack-client": "github:stainless-sdks/llama-stack-node#ehhuang/dev",
+        "lucide-react": "^0.510.0",
+        "next": "15.3.2",
+        "next-themes": "^0.4.6",
+        "openai": "^4.103.0",
+        "react": "^19.0.0",
+        "react-dom": "^19.0.0",
+        "tailwind-merge": "^3.3.0"
+      },
+      "devDependencies": {
+        "@eslint/eslintrc": "^3",
+        "@tailwindcss/postcss": "^4",
+        "@testing-library/dom": "^10.4.0",
+        "@testing-library/jest-dom": "^6.6.3",
+        "@testing-library/react": "^16.3.0",
+        "@types/jest": "^29.5.14",
+        "@types/node": "^20",
+        "@types/react": "^19",
+        "@types/react-dom": "^19",
+        "eslint": "^9",
+        "eslint-config-next": "15.3.2",
+        "eslint-config-prettier": "^10.1.5",
+        "eslint-plugin-prettier": "^5.4.0",
+        "jest": "^29.7.0",
+        "jest-environment-jsdom": "^29.7.0",
+        "prettier": "3.5.3",
+        "tailwindcss": "^4",
+        "ts-node": "^10.9.2",
+        "tw-animate-css": "^1.2.9",
+        "typescript": "^5"
+      }
+    },
+    "node_modules/@adobe/css-tools": {
+      "version": "4.4.3",
+      "resolved": "https://registry.npmjs.org/@adobe/css-tools/-/css-tools-4.4.3.tgz",
+      "integrity": "sha512-VQKMkwriZbaOgVCby1UDY/LDk5fIjhQicCvVPFqfe+69fWaPWydbWJ3wRt59/YzIwda1I81loas3oCoHxnqvdA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@alloc/quick-lru": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
+      "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/@ampproject/remapping": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz",
+      "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@jridgewell/gen-mapping": "^0.3.5",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@babel/code-frame": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz",
+      "integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-validator-identifier": "^7.27.1",
+        "js-tokens": "^4.0.0",
+        "picocolors": "^1.1.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/compat-data": {
+      "version": "7.27.2",
+      "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.27.2.tgz",
+      "integrity": "sha512-TUtMJYRPyUb/9aU8f3K0mjmjf6M9N5Woshn2CS6nqJSeJtTtQcpLUXjGt9vbF8ZGff0El99sWkLgzwW3VXnxZQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/core": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.27.1.tgz",
+      "integrity": "sha512-IaaGWsQqfsQWVLqMn9OB92MNN7zukfVA4s7KKAI0KfrrDsZ0yhi5uV4baBuLuN7n3vsZpwP8asPPcVwApxvjBQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@ampproject/remapping": "^2.2.0",
+        "@babel/code-frame": "^7.27.1",
+        "@babel/generator": "^7.27.1",
+        "@babel/helper-compilation-targets": "^7.27.1",
+        "@babel/helper-module-transforms": "^7.27.1",
+        "@babel/helpers": "^7.27.1",
+        "@babel/parser": "^7.27.1",
+        "@babel/template": "^7.27.1",
+        "@babel/traverse": "^7.27.1",
+        "@babel/types": "^7.27.1",
+        "convert-source-map": "^2.0.0",
+        "debug": "^4.1.0",
+        "gensync": "^1.0.0-beta.2",
+        "json5": "^2.2.3",
+        "semver": "^6.3.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/babel"
+      }
+    },
+    "node_modules/@babel/core/node_modules/json5": {
+      "version": "2.2.3",
+      "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz",
+      "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "json5": "lib/cli.js"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/@babel/core/node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
+    "node_modules/@babel/generator": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.27.1.tgz",
+      "integrity": "sha512-UnJfnIpc/+JO0/+KRVQNGU+y5taA5vCbwN8+azkX6beii/ZF+enZJSOKo11ZSzGJjlNfJHfQtmQT8H+9TXPG2w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/parser": "^7.27.1",
+        "@babel/types": "^7.27.1",
+        "@jridgewell/gen-mapping": "^0.3.5",
+        "@jridgewell/trace-mapping": "^0.3.25",
+        "jsesc": "^3.0.2"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-compilation-targets": {
+      "version": "7.27.2",
+      "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.27.2.tgz",
+      "integrity": "sha512-2+1thGUUWWjLTYTHZWK1n8Yga0ijBz1XAhUXcKy81rd5g6yh7hGqMp45v7cadSbEHc9G3OTv45SyneRN3ps4DQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/compat-data": "^7.27.2",
+        "@babel/helper-validator-option": "^7.27.1",
+        "browserslist": "^4.24.0",
+        "lru-cache": "^5.1.1",
+        "semver": "^6.3.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-compilation-targets/node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
+    "node_modules/@babel/helper-module-imports": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.27.1.tgz",
+      "integrity": "sha512-0gSFWUPNXNopqtIPQvlD5WgXYI5GY2kP2cCvoT8kczjbfcfuIljTbcWrulD1CIPIX2gt1wghbDy08yE1p+/r3w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/traverse": "^7.27.1",
+        "@babel/types": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-module-transforms": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.27.1.tgz",
+      "integrity": "sha512-9yHn519/8KvTU5BjTVEEeIM3w9/2yXNKoD82JifINImhpKkARMJKPP59kLo+BafpdN5zgNeIcS4jsGDmd3l58g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-module-imports": "^7.27.1",
+        "@babel/helper-validator-identifier": "^7.27.1",
+        "@babel/traverse": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0"
+      }
+    },
+    "node_modules/@babel/helper-plugin-utils": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.27.1.tgz",
+      "integrity": "sha512-1gn1Up5YXka3YYAHGKpbideQ5Yjf1tDa9qYcgysz+cNCXukyLl6DjPXhD3VRwSb8c0J9tA4b2+rHEZtc6R0tlw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-string-parser": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
+      "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-validator-identifier": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.27.1.tgz",
+      "integrity": "sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-validator-option": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz",
+      "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helpers": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.27.1.tgz",
+      "integrity": "sha512-FCvFTm0sWV8Fxhpp2McP5/W53GPllQ9QeQ7SiqGWjMf/LVG07lFa5+pgK05IRhVwtvafT22KF+ZSnM9I545CvQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/template": "^7.27.1",
+        "@babel/types": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/parser": {
+      "version": "7.27.2",
+      "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.27.2.tgz",
+      "integrity": "sha512-QYLs8299NA7WM/bZAdp+CviYYkVoYXlDW2rzliy3chxd1PQjej7JORuMJDJXJUb9g0TT+B99EwaVLKmX+sPXWw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/types": "^7.27.1"
+      },
+      "bin": {
+        "parser": "bin/babel-parser.js"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-async-generators": {
+      "version": "7.8.4",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz",
+      "integrity": "sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-bigint": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-bigint/-/plugin-syntax-bigint-7.8.3.tgz",
+      "integrity": "sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-class-properties": {
+      "version": "7.12.13",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-properties/-/plugin-syntax-class-properties-7.12.13.tgz",
+      "integrity": "sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.12.13"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-class-static-block": {
+      "version": "7.14.5",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-static-block/-/plugin-syntax-class-static-block-7.14.5.tgz",
+      "integrity": "sha512-b+YyPmr6ldyNnM6sqYeMWE+bgJcJpO6yS4QD7ymxgH34GBPNDM/THBh8iunyvKIZztiwLH4CJZ0RxTk9emgpjw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.14.5"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-import-attributes": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-attributes/-/plugin-syntax-import-attributes-7.27.1.tgz",
+      "integrity": "sha512-oFT0FrKHgF53f4vOsZGi2Hh3I35PfSmVs4IBFLFj4dnafP+hIWDLg3VyKmUHfLoLHlyxY4C7DGtmHuJgn+IGww==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-import-meta": {
+      "version": "7.10.4",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-meta/-/plugin-syntax-import-meta-7.10.4.tgz",
+      "integrity": "sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.10.4"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-json-strings": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-json-strings/-/plugin-syntax-json-strings-7.8.3.tgz",
+      "integrity": "sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-jsx": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.27.1.tgz",
+      "integrity": "sha512-y8YTNIeKoyhGd9O0Jiyzyyqk8gdjnumGTQPsz0xOZOQ2RmkVJeZ1vmmfIvFEKqucBG6axJGBZDE/7iI5suUI/w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-logical-assignment-operators": {
+      "version": "7.10.4",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-logical-assignment-operators/-/plugin-syntax-logical-assignment-operators-7.10.4.tgz",
+      "integrity": "sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.10.4"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-nullish-coalescing-operator": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-nullish-coalescing-operator/-/plugin-syntax-nullish-coalescing-operator-7.8.3.tgz",
+      "integrity": "sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-numeric-separator": {
+      "version": "7.10.4",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-numeric-separator/-/plugin-syntax-numeric-separator-7.10.4.tgz",
+      "integrity": "sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.10.4"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-object-rest-spread": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-object-rest-spread/-/plugin-syntax-object-rest-spread-7.8.3.tgz",
+      "integrity": "sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-optional-catch-binding": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-catch-binding/-/plugin-syntax-optional-catch-binding-7.8.3.tgz",
+      "integrity": "sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-optional-chaining": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-chaining/-/plugin-syntax-optional-chaining-7.8.3.tgz",
+      "integrity": "sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-private-property-in-object": {
+      "version": "7.14.5",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-private-property-in-object/-/plugin-syntax-private-property-in-object-7.14.5.tgz",
+      "integrity": "sha512-0wVnp9dxJ72ZUJDV27ZfbSj6iHLoytYZmh3rFcxNnvsJF3ktkzLDZPy/mA17HGsaQT3/DQsWYX1f1QGWkCoVUg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.14.5"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-top-level-await": {
+      "version": "7.14.5",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.14.5.tgz",
+      "integrity": "sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.14.5"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-typescript": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.27.1.tgz",
+      "integrity": "sha512-xfYCBMxveHrRMnAWl1ZlPXOZjzkN82THFvLhQhFXFt81Z5HnN+EtUkZhv/zcKpmT3fzmWZB0ywiBrbC3vogbwQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/runtime": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.27.1.tgz",
+      "integrity": "sha512-1x3D2xEk2fRo3PAhwQwu5UubzgiVWSXTBfWpVd2Mx2AzRqJuDJCsgaDVZ7HB5iGzDW1Hl1sWN2mFyKjmR9uAog==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/template": {
+      "version": "7.27.2",
+      "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.2.tgz",
+      "integrity": "sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.27.1",
+        "@babel/parser": "^7.27.2",
+        "@babel/types": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/traverse": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.27.1.tgz",
+      "integrity": "sha512-ZCYtZciz1IWJB4U61UPu4KEaqyfj+r5T1Q5mqPo+IBpcG9kHv30Z0aD8LXPgC1trYa6rK0orRyAhqUgk4MjmEg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.27.1",
+        "@babel/generator": "^7.27.1",
+        "@babel/parser": "^7.27.1",
+        "@babel/template": "^7.27.1",
+        "@babel/types": "^7.27.1",
+        "debug": "^4.3.1",
+        "globals": "^11.1.0"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/traverse/node_modules/globals": {
+      "version": "11.12.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz",
+      "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/@babel/types": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.27.1.tgz",
+      "integrity": "sha512-+EzkxvLNfiUeKMgy/3luqfsCWFRXLb7U6wNQTk60tovuckwB15B191tJWvpp4HjiQWdJkCxO3Wbvc6jlk3Xb2Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-string-parser": "^7.27.1",
+        "@babel/helper-validator-identifier": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@bcoe/v8-coverage": {
+      "version": "0.2.3",
+      "resolved": "https://registry.npmjs.org/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz",
+      "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@cspotcode/source-map-support": {
+      "version": "0.8.1",
+      "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
+      "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/trace-mapping": "0.3.9"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@cspotcode/source-map-support/node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.9",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz",
+      "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.0.3",
+        "@jridgewell/sourcemap-codec": "^1.4.10"
+      }
+    },
+    "node_modules/@emnapi/core": {
+      "version": "1.4.3",
+      "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.4.3.tgz",
+      "integrity": "sha512-4m62DuCE07lw01soJwPiBGC0nAww0Q+RY70VZ+n49yDIO13yyinhbWCeNnaob0lakDtWQzSdtNWzJeOJt2ma+g==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/wasi-threads": "1.0.2",
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@emnapi/runtime": {
+      "version": "1.4.3",
+      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.4.3.tgz",
+      "integrity": "sha512-pBPWdu6MLKROBX05wSNKcNb++m5Er+KQ9QkB+WVM+pW2Kx9hoSrVTnu3BdkI5eBLZoKu/J6mW/B6i6bJB2ytXQ==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@emnapi/wasi-threads": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.0.2.tgz",
+      "integrity": "sha512-5n3nTJblwRi8LlXkJ9eBzu+kZR8Yxcc7ubakyQTFzPMtIhFpUBRbsnc2Dv88IZDIbCDlBiWrknhB4Lsz7mg6BA==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@eslint-community/eslint-utils": {
+      "version": "4.7.0",
+      "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz",
+      "integrity": "sha512-dyybb3AcajC7uha6CvhdVRJqaKyn7w2YKqKyAN37NKYgZT36w+iRb0Dymmc5qEJ549c/S31cMMSFd75bteCpCw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "eslint-visitor-keys": "^3.4.3"
+      },
+      "engines": {
+        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      },
+      "peerDependencies": {
+        "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0"
+      }
+    },
+    "node_modules/@eslint-community/eslint-utils/node_modules/eslint-visitor-keys": {
+      "version": "3.4.3",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz",
+      "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/@eslint-community/regexpp": {
+      "version": "4.12.1",
+      "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.12.1.tgz",
+      "integrity": "sha512-CCZCDJuduB9OUkFkY2IgppNZMi2lBQgD2qzwXkEia16cge2pijY/aXi96CJMquDMn3nJdlPV1A5KrJEXwfLNzQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^12.0.0 || ^14.0.0 || >=16.0.0"
+      }
+    },
+    "node_modules/@eslint/config-array": {
+      "version": "0.20.0",
+      "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.20.0.tgz",
+      "integrity": "sha512-fxlS1kkIjx8+vy2SjuCB94q3htSNrufYTXubwiBFeaQHbH6Ipi43gFJq2zCMt6PHhImH3Xmr0NksKDvchWlpQQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@eslint/object-schema": "^2.1.6",
+        "debug": "^4.3.1",
+        "minimatch": "^3.1.2"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      }
+    },
+    "node_modules/@eslint/config-helpers": {
+      "version": "0.2.2",
+      "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.2.2.tgz",
+      "integrity": "sha512-+GPzk8PlG0sPpzdU5ZvIRMPidzAnZDl/s9L+y13iodqvb8leL53bTannOrQ/Im7UkpsmFU5Ily5U60LWixnmLg==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      }
+    },
+    "node_modules/@eslint/core": {
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.13.0.tgz",
+      "integrity": "sha512-yfkgDw1KR66rkT5A8ci4irzDysN7FRpq3ttJolR88OqQikAWqwA8j5VZyas+vjyBNFIJ7MfybJ9plMILI2UrCw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@types/json-schema": "^7.0.15"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      }
+    },
+    "node_modules/@eslint/eslintrc": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.3.1.tgz",
+      "integrity": "sha512-gtF186CXhIl1p4pJNGZw8Yc6RlshoePRvE0X91oPGb3vZ8pM3qOS9W9NGPat9LziaBV7XrJWGylNQXkGcnM3IQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ajv": "^6.12.4",
+        "debug": "^4.3.2",
+        "espree": "^10.0.1",
+        "globals": "^14.0.0",
+        "ignore": "^5.2.0",
+        "import-fresh": "^3.2.1",
+        "js-yaml": "^4.1.0",
+        "minimatch": "^3.1.2",
+        "strip-json-comments": "^3.1.1"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/@eslint/js": {
+      "version": "9.26.0",
+      "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.26.0.tgz",
+      "integrity": "sha512-I9XlJawFdSMvWjDt6wksMCrgns5ggLNfFwFvnShsleWruvXM514Qxk8V246efTw+eo9JABvVz+u3q2RiAowKxQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      }
+    },
+    "node_modules/@eslint/object-schema": {
+      "version": "2.1.6",
+      "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-2.1.6.tgz",
+      "integrity": "sha512-RBMg5FRL0I0gs51M/guSAj5/e14VQ4tpZnQNWwuDT66P14I43ItmPfIZRhO9fUVIPOAQXU47atlywZ/czoqFPA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      }
+    },
+    "node_modules/@eslint/plugin-kit": {
+      "version": "0.2.8",
+      "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.2.8.tgz",
+      "integrity": "sha512-ZAoA40rNMPwSm+AeHpCq8STiNAwzWLJuP8Xv4CHIc9wv/PSuExjMrmjfYNj682vW0OOiZ1HKxzvjQr9XZIisQA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@eslint/core": "^0.13.0",
+        "levn": "^0.4.1"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      }
+    },
+    "node_modules/@floating-ui/core": {
+      "version": "1.7.0",
+      "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.0.tgz",
+      "integrity": "sha512-FRdBLykrPPA6P76GGGqlex/e7fbe0F1ykgxHYNXQsH/iTEtjMj/f9bpY5oQqbjt5VgZvgz/uKXbGuROijh3VLA==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/utils": "^0.2.9"
+      }
+    },
+    "node_modules/@floating-ui/dom": {
+      "version": "1.7.0",
+      "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.0.tgz",
+      "integrity": "sha512-lGTor4VlXcesUMh1cupTUTDoCxMb0V6bm3CnxHzQcw8Eaf1jQbgQX4i02fYgT0vJ82tb5MZ4CZk1LRGkktJCzg==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/core": "^1.7.0",
+        "@floating-ui/utils": "^0.2.9"
+      }
+    },
+    "node_modules/@floating-ui/react-dom": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.2.tgz",
+      "integrity": "sha512-06okr5cgPzMNBy+Ycse2A6udMi4bqwW/zgBF/rwjcNqWkyr82Mcg8b0vjX8OJpZFy/FKjJmw6wV7t44kK6kW7A==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/dom": "^1.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0",
+        "react-dom": ">=16.8.0"
+      }
+    },
+    "node_modules/@floating-ui/utils": {
+      "version": "0.2.9",
+      "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.9.tgz",
+      "integrity": "sha512-MDWhGtE+eHw5JW7lq4qhc5yRLS11ERl1c7Z6Xd0a58DozHES6EnNNwUWbMiG4J9Cgj053Bhk8zvlhFYKVhULwg==",
+      "license": "MIT"
+    },
+    "node_modules/@humanfs/core": {
+      "version": "0.19.1",
+      "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz",
+      "integrity": "sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=18.18.0"
+      }
+    },
+    "node_modules/@humanfs/node": {
+      "version": "0.16.6",
+      "resolved": "https://registry.npmjs.org/@humanfs/node/-/node-0.16.6.tgz",
+      "integrity": "sha512-YuI2ZHQL78Q5HbhDiBA1X4LmYdXCKCMQIfw0pw7piHJwyREFebJUvrQN4cMssyES6x+vfUbx1CIpaQUKYdQZOw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@humanfs/core": "^0.19.1",
+        "@humanwhocodes/retry": "^0.3.0"
+      },
+      "engines": {
+        "node": ">=18.18.0"
+      }
+    },
+    "node_modules/@humanfs/node/node_modules/@humanwhocodes/retry": {
+      "version": "0.3.1",
+      "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.3.1.tgz",
+      "integrity": "sha512-JBxkERygn7Bv/GbN5Rv8Ul6LVknS+5Bp6RgDC/O8gEBU/yeH5Ui5C/OlWrTb6qct7LjjfT6Re2NxB0ln0yYybA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=18.18"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/nzakas"
+      }
+    },
+    "node_modules/@humanwhocodes/module-importer": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz",
+      "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=12.22"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/nzakas"
+      }
+    },
+    "node_modules/@humanwhocodes/retry": {
+      "version": "0.4.3",
+      "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.4.3.tgz",
+      "integrity": "sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=18.18"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/nzakas"
+      }
+    },
+    "node_modules/@img/sharp-darwin-arm64": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.1.tgz",
+      "integrity": "sha512-pn44xgBtgpEbZsu+lWf2KNb6OAf70X68k+yk69Ic2Xz11zHR/w24/U49XT7AeRwJ0Px+mhALhU5LPci1Aymk7A==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-darwin-arm64": "1.1.0"
+      }
+    },
+    "node_modules/@img/sharp-darwin-x64": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.1.tgz",
+      "integrity": "sha512-VfuYgG2r8BpYiOUN+BfYeFo69nP/MIwAtSJ7/Zpxc5QF3KS22z8Pvg3FkrSFJBPNQ7mmcUcYQFBmEQp7eu1F8Q==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-darwin-x64": "1.1.0"
+      }
+    },
+    "node_modules/@img/sharp-libvips-darwin-arm64": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.1.0.tgz",
+      "integrity": "sha512-HZ/JUmPwrJSoM4DIQPv/BfNh9yrOA8tlBbqbLz4JZ5uew2+o22Ik+tHQJcih7QJuSa0zo5coHTfD5J8inqj9DA==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-darwin-x64": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.1.0.tgz",
+      "integrity": "sha512-Xzc2ToEmHN+hfvsl9wja0RlnXEgpKNmftriQp6XzY/RaSfwD9th+MSh0WQKzUreLKKINb3afirxW7A0fz2YWuQ==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-arm": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.1.0.tgz",
+      "integrity": "sha512-s8BAd0lwUIvYCJyRdFqvsj+BJIpDBSxs6ivrOPm/R7piTs5UIwY5OjXrP2bqXC9/moGsyRa37eYWYCOGVXxVrA==",
+      "cpu": [
+        "arm"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-arm64": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.1.0.tgz",
+      "integrity": "sha512-IVfGJa7gjChDET1dK9SekxFFdflarnUB8PwW8aGwEoF3oAsSDuNUTYS+SKDOyOJxQyDC1aPFMuRYLoDInyV9Ew==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-ppc64": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.1.0.tgz",
+      "integrity": "sha512-tiXxFZFbhnkWE2LA8oQj7KYR+bWBkiV2nilRldT7bqoEZ4HiDOcePr9wVDAZPi/Id5fT1oY9iGnDq20cwUz8lQ==",
+      "cpu": [
+        "ppc64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-s390x": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.1.0.tgz",
+      "integrity": "sha512-xukSwvhguw7COyzvmjydRb3x/09+21HykyapcZchiCUkTThEQEOMtBj9UhkaBRLuBrgLFzQ2wbxdeCCJW/jgJA==",
+      "cpu": [
+        "s390x"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-x64": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.1.0.tgz",
+      "integrity": "sha512-yRj2+reB8iMg9W5sULM3S74jVS7zqSzHG3Ol/twnAAkAhnGQnpjj6e4ayUz7V+FpKypwgs82xbRdYtchTTUB+Q==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linuxmusl-arm64": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.1.0.tgz",
+      "integrity": "sha512-jYZdG+whg0MDK+q2COKbYidaqW/WTz0cc1E+tMAusiDygrM4ypmSCjOJPmFTvHHJ8j/6cAGyeDWZOsK06tP33w==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linuxmusl-x64": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.1.0.tgz",
+      "integrity": "sha512-wK7SBdwrAiycjXdkPnGCPLjYb9lD4l6Ze2gSdAGVZrEL05AOUJESWU2lhlC+Ffn5/G+VKuSm6zzbQSzFX/P65A==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-linux-arm": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.1.tgz",
+      "integrity": "sha512-anKiszvACti2sGy9CirTlNyk7BjjZPiML1jt2ZkTdcvpLU1YH6CXwRAZCA2UmRXnhiIftXQ7+Oh62Ji25W72jA==",
+      "cpu": [
+        "arm"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-arm": "1.1.0"
+      }
+    },
+    "node_modules/@img/sharp-linux-arm64": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.1.tgz",
+      "integrity": "sha512-kX2c+vbvaXC6vly1RDf/IWNXxrlxLNpBVWkdpRq5Ka7OOKj6nr66etKy2IENf6FtOgklkg9ZdGpEu9kwdlcwOQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-arm64": "1.1.0"
+      }
+    },
+    "node_modules/@img/sharp-linux-s390x": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.1.tgz",
+      "integrity": "sha512-7s0KX2tI9mZI2buRipKIw2X1ufdTeaRgwmRabt5bi9chYfhur+/C1OXg3TKg/eag1W+6CCWLVmSauV1owmRPxA==",
+      "cpu": [
+        "s390x"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-s390x": "1.1.0"
+      }
+    },
+    "node_modules/@img/sharp-linux-x64": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.1.tgz",
+      "integrity": "sha512-wExv7SH9nmoBW3Wr2gvQopX1k8q2g5V5Iag8Zk6AVENsjwd+3adjwxtp3Dcu2QhOXr8W9NusBU6XcQUohBZ5MA==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-x64": "1.1.0"
+      }
+    },
+    "node_modules/@img/sharp-linuxmusl-arm64": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.1.tgz",
+      "integrity": "sha512-DfvyxzHxw4WGdPiTF0SOHnm11Xv4aQexvqhRDAoD00MzHekAj9a/jADXeXYCDFH/DzYruwHbXU7uz+H+nWmSOQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linuxmusl-arm64": "1.1.0"
+      }
+    },
+    "node_modules/@img/sharp-linuxmusl-x64": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.1.tgz",
+      "integrity": "sha512-pax/kTR407vNb9qaSIiWVnQplPcGU8LRIJpDT5o8PdAx5aAA7AS3X9PS8Isw1/WfqgQorPotjrZL3Pqh6C5EBg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linuxmusl-x64": "1.1.0"
+      }
+    },
+    "node_modules/@img/sharp-wasm32": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.1.tgz",
+      "integrity": "sha512-YDybQnYrLQfEpzGOQe7OKcyLUCML4YOXl428gOOzBgN6Gw0rv8dpsJ7PqTHxBnXnwXr8S1mYFSLSa727tpz0xg==",
+      "cpu": [
+        "wasm32"
+      ],
+      "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/runtime": "^1.4.0"
+      },
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-win32-ia32": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.1.tgz",
+      "integrity": "sha512-WKf/NAZITnonBf3U1LfdjoMgNO5JYRSlhovhRhMxXVdvWYveM4kM3L8m35onYIdh75cOMCo1BexgVQcCDzyoWw==",
+      "cpu": [
+        "ia32"
+      ],
+      "license": "Apache-2.0 AND LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-win32-x64": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.1.tgz",
+      "integrity": "sha512-hw1iIAHpNE8q3uMIRCgGOeDoz9KtFNarFLQclLxr/LK1VBkj8nby18RjFvr6aP7USRYAjTZW6yisnBWMX571Tw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0 AND LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@isaacs/fs-minipass": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
+      "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "minipass": "^7.0.4"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@istanbuljs/load-nyc-config": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
+      "integrity": "sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "camelcase": "^5.3.1",
+        "find-up": "^4.1.0",
+        "get-package-type": "^0.1.0",
+        "js-yaml": "^3.13.1",
+        "resolve-from": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/@istanbuljs/load-nyc-config/node_modules/argparse": {
+      "version": "1.0.10",
+      "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
+      "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "sprintf-js": "~1.0.2"
+      }
+    },
+    "node_modules/@istanbuljs/load-nyc-config/node_modules/find-up": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
+      "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "locate-path": "^5.0.0",
+        "path-exists": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/@istanbuljs/load-nyc-config/node_modules/js-yaml": {
+      "version": "3.14.1",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
+      "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "argparse": "^1.0.7",
+        "esprima": "^4.0.0"
+      },
+      "bin": {
+        "js-yaml": "bin/js-yaml.js"
+      }
+    },
+    "node_modules/@istanbuljs/load-nyc-config/node_modules/locate-path": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
+      "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-locate": "^4.1.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/@istanbuljs/load-nyc-config/node_modules/p-limit": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
+      "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-try": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/@istanbuljs/load-nyc-config/node_modules/p-locate": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
+      "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-limit": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/@istanbuljs/load-nyc-config/node_modules/resolve-from": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz",
+      "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/@istanbuljs/schema": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/@istanbuljs/schema/-/schema-0.1.3.tgz",
+      "integrity": "sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/@jest/console": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/console/-/console-29.7.0.tgz",
+      "integrity": "sha512-5Ni4CU7XHQi32IJ398EEP4RrB8eV09sXP2ROqD4bksHrnTree52PsxvX8tpL8LvTZ3pFzXyPbNQReSN41CAhOg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "jest-message-util": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "slash": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/core": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/core/-/core-29.7.0.tgz",
+      "integrity": "sha512-n7aeXWKMnGtDA48y8TLWJPJmLmmZ642Ceo78cYWEpiD7FzDgmNDV/GCVRorPABdXLJZ/9wzzgZAlHjXjxDHGsg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/console": "^29.7.0",
+        "@jest/reporters": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "ansi-escapes": "^4.2.1",
+        "chalk": "^4.0.0",
+        "ci-info": "^3.2.0",
+        "exit": "^0.1.2",
+        "graceful-fs": "^4.2.9",
+        "jest-changed-files": "^29.7.0",
+        "jest-config": "^29.7.0",
+        "jest-haste-map": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-regex-util": "^29.6.3",
+        "jest-resolve": "^29.7.0",
+        "jest-resolve-dependencies": "^29.7.0",
+        "jest-runner": "^29.7.0",
+        "jest-runtime": "^29.7.0",
+        "jest-snapshot": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-validate": "^29.7.0",
+        "jest-watcher": "^29.7.0",
+        "micromatch": "^4.0.4",
+        "pretty-format": "^29.7.0",
+        "slash": "^3.0.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0"
+      },
+      "peerDependenciesMeta": {
+        "node-notifier": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@jest/core/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/@jest/core/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/core/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jest/environment": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/environment/-/environment-29.7.0.tgz",
+      "integrity": "sha512-aQIfHDq33ExsN4jP1NWGXhxgQ/wixs60gDiKO+XVMd8Mn0NWPWgc34ZQDTb2jKaUWQ7MuwoitXAsN2XVXNMpAw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/fake-timers": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "jest-mock": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/expect": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/expect/-/expect-29.7.0.tgz",
+      "integrity": "sha512-8uMeAMycttpva3P1lBHB8VciS9V0XAr3GymPpipdyQXbBcuhkLQOSe8E/p92RyAdToS6ZD1tFkX+CkhoECE0dQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "expect": "^29.7.0",
+        "jest-snapshot": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/expect-utils": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-29.7.0.tgz",
+      "integrity": "sha512-GlsNBWiFQFCVi9QVSx7f5AgMeLxe9YCCs5PuP2O2LdjDAA8Jh9eX7lA1Jq/xdXw3Wb3hyvlFNfZIfcRetSzYcA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "jest-get-type": "^29.6.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/fake-timers": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/fake-timers/-/fake-timers-29.7.0.tgz",
+      "integrity": "sha512-q4DH1Ha4TTFPdxLsqDXK1d3+ioSL7yL5oCMJZgDYm6i+6CygW5E5xVr/D1HdsGxjt1ZWSfUAs9OxSB/BNelWrQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@sinonjs/fake-timers": "^10.0.2",
+        "@types/node": "*",
+        "jest-message-util": "^29.7.0",
+        "jest-mock": "^29.7.0",
+        "jest-util": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/globals": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/globals/-/globals-29.7.0.tgz",
+      "integrity": "sha512-mpiz3dutLbkW2MNFubUGUEVLkTGiqW6yLVTA+JbP6fI6J5iL9Y0Nlg8k95pcF8ctKwCS7WVxteBs29hhfAotzQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/environment": "^29.7.0",
+        "@jest/expect": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "jest-mock": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/reporters": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/reporters/-/reporters-29.7.0.tgz",
+      "integrity": "sha512-DApq0KJbJOEzAFYjHADNNxAE3KbhxQB1y5Kplb5Waqw6zVbuWatSnMjE5gs8FUgEPmNsnZA3NCWl9NG0ia04Pg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@bcoe/v8-coverage": "^0.2.3",
+        "@jest/console": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@jridgewell/trace-mapping": "^0.3.18",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "collect-v8-coverage": "^1.0.0",
+        "exit": "^0.1.2",
+        "glob": "^7.1.3",
+        "graceful-fs": "^4.2.9",
+        "istanbul-lib-coverage": "^3.0.0",
+        "istanbul-lib-instrument": "^6.0.0",
+        "istanbul-lib-report": "^3.0.0",
+        "istanbul-lib-source-maps": "^4.0.0",
+        "istanbul-reports": "^3.1.3",
+        "jest-message-util": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-worker": "^29.7.0",
+        "slash": "^3.0.0",
+        "string-length": "^4.0.1",
+        "strip-ansi": "^6.0.0",
+        "v8-to-istanbul": "^9.0.1"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0"
+      },
+      "peerDependenciesMeta": {
+        "node-notifier": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/source-map": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/source-map/-/source-map-29.6.3.tgz",
+      "integrity": "sha512-MHjT95QuipcPrpLM+8JMSzFx6eHp5Bm+4XeFDJlwsvVBjmKNiIAvasGK2fxz2WbGRlnvqehFbh07MMa7n3YJnw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/trace-mapping": "^0.3.18",
+        "callsites": "^3.0.0",
+        "graceful-fs": "^4.2.9"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/test-result": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/test-result/-/test-result-29.7.0.tgz",
+      "integrity": "sha512-Fdx+tv6x1zlkJPcWXmMDAG2HBnaR9XPSd5aDWQVsfrZmLVT3lU1cwyxLgRmXR9yrq4NBoEm9BMsfgFzTQAbJYA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/console": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "collect-v8-coverage": "^1.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/test-sequencer": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/test-sequencer/-/test-sequencer-29.7.0.tgz",
+      "integrity": "sha512-GQwJ5WZVrKnOJuiYiAF52UNUJXgTZx1NHjFSEB0qEMmSZKAkdMoIzw/Cj6x6NF4AvV23AUqDpFzQkN/eYCYTxw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/test-result": "^29.7.0",
+        "graceful-fs": "^4.2.9",
+        "jest-haste-map": "^29.7.0",
+        "slash": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/transform": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/transform/-/transform-29.7.0.tgz",
+      "integrity": "sha512-ok/BTPFzFKVMwO5eOHRrvnBVHdRy9IrsrW1GpMaQ9MCnilNLXQKmAX8s1YXDFaai9xJpac2ySzV0YeRRECr2Vw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/core": "^7.11.6",
+        "@jest/types": "^29.6.3",
+        "@jridgewell/trace-mapping": "^0.3.18",
+        "babel-plugin-istanbul": "^6.1.1",
+        "chalk": "^4.0.0",
+        "convert-source-map": "^2.0.0",
+        "fast-json-stable-stringify": "^2.1.0",
+        "graceful-fs": "^4.2.9",
+        "jest-haste-map": "^29.7.0",
+        "jest-regex-util": "^29.6.3",
+        "jest-util": "^29.7.0",
+        "micromatch": "^4.0.4",
+        "pirates": "^4.0.4",
+        "slash": "^3.0.0",
+        "write-file-atomic": "^4.0.2"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jridgewell/gen-mapping": {
+      "version": "0.3.8",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.8.tgz",
+      "integrity": "sha512-imAbBGkb+ebQyxKgzv5Hu2nmROxoDOXHh80evxdoXNOrvAnVx7zimzc1Oo5h9RlfV4vPXaE2iM5pOFbvOCClWA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/set-array": "^1.2.1",
+        "@jridgewell/sourcemap-codec": "^1.4.10",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/resolve-uri": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
+      "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/set-array": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz",
+      "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/sourcemap-codec": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz",
+      "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.25",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz",
+      "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
+      }
+    },
+    "node_modules/@modelcontextprotocol/sdk": {
+      "version": "1.11.2",
+      "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.11.2.tgz",
+      "integrity": "sha512-H9vwztj5OAqHg9GockCQC06k1natgcxWQSRpQcPJf6i5+MWBzfKkRtxGbjQf0X2ihii0ffLZCRGbYV2f2bjNCQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "content-type": "^1.0.5",
+        "cors": "^2.8.5",
+        "cross-spawn": "^7.0.3",
+        "eventsource": "^3.0.2",
+        "express": "^5.0.1",
+        "express-rate-limit": "^7.5.0",
+        "pkce-challenge": "^5.0.0",
+        "raw-body": "^3.0.0",
+        "zod": "^3.23.8",
+        "zod-to-json-schema": "^3.24.1"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@napi-rs/wasm-runtime": {
+      "version": "0.2.9",
+      "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.9.tgz",
+      "integrity": "sha512-OKRBiajrrxB9ATokgEQoG87Z25c67pCpYcCwmXYX8PBftC9pBfN18gnm/fh1wurSLEKIAt+QRFLFCQISrb66Jg==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/core": "^1.4.0",
+        "@emnapi/runtime": "^1.4.0",
+        "@tybys/wasm-util": "^0.9.0"
+      }
+    },
+    "node_modules/@next/env": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/env/-/env-15.3.2.tgz",
+      "integrity": "sha512-xURk++7P7qR9JG1jJtLzPzf0qEvqCN0A/T3DXf8IPMKo9/6FfjxtEffRJIIew/bIL4T3C2jLLqBor8B/zVlx6g==",
+      "license": "MIT"
+    },
+    "node_modules/@next/eslint-plugin-next": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-15.3.2.tgz",
+      "integrity": "sha512-ijVRTXBgnHT33aWnDtmlG+LJD+5vhc9AKTJPquGG5NKXjpKNjc62woIhFtrAcWdBobt8kqjCoaJ0q6sDQoX7aQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fast-glob": "3.3.1"
+      }
+    },
+    "node_modules/@next/swc-darwin-arm64": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.3.2.tgz",
+      "integrity": "sha512-2DR6kY/OGcokbnCsjHpNeQblqCZ85/1j6njYSkzRdpLn5At7OkSdmk7WyAmB9G0k25+VgqVZ/u356OSoQZ3z0g==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@next/swc-darwin-x64": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.3.2.tgz",
+      "integrity": "sha512-ro/fdqaZWL6k1S/5CLv1I0DaZfDVJkWNaUU3un8Lg6m0YENWlDulmIWzV96Iou2wEYyEsZq51mwV8+XQXqMp3w==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@next/swc-linux-arm64-gnu": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.3.2.tgz",
+      "integrity": "sha512-covwwtZYhlbRWK2HlYX9835qXum4xYZ3E2Mra1mdQ+0ICGoMiw1+nVAn4d9Bo7R3JqSmK1grMq/va+0cdh7bJA==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@next/swc-linux-arm64-musl": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.3.2.tgz",
+      "integrity": "sha512-KQkMEillvlW5Qk5mtGA/3Yz0/tzpNlSw6/3/ttsV1lNtMuOHcGii3zVeXZyi4EJmmLDKYcTcByV2wVsOhDt/zg==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@next/swc-linux-x64-gnu": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.3.2.tgz",
+      "integrity": "sha512-uRBo6THWei0chz+Y5j37qzx+BtoDRFIkDzZjlpCItBRXyMPIg079eIkOCl3aqr2tkxL4HFyJ4GHDes7W8HuAUg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@next/swc-linux-x64-musl": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.3.2.tgz",
+      "integrity": "sha512-+uxFlPuCNx/T9PdMClOqeE8USKzj8tVz37KflT3Kdbx/LOlZBRI2yxuIcmx1mPNK8DwSOMNCr4ureSet7eyC0w==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@next/swc-win32-arm64-msvc": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.3.2.tgz",
+      "integrity": "sha512-LLTKmaI5cfD8dVzh5Vt7+OMo+AIOClEdIU/TSKbXXT2iScUTSxOGoBhfuv+FU8R9MLmrkIL1e2fBMkEEjYAtPQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@next/swc-win32-x64-msvc": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.3.2.tgz",
+      "integrity": "sha512-aW5B8wOPioJ4mBdMDXkt5f3j8pUr9W8AnlX0Df35uRWNT1Y6RIybxjnSUe+PhM+M1bwgyY8PHLmXZC6zT1o5tA==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@nodelib/fs.scandir": {
+      "version": "2.1.5",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
+      "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.stat": "2.0.5",
+        "run-parallel": "^1.1.9"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/@nodelib/fs.stat": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz",
+      "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/@nodelib/fs.walk": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz",
+      "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.scandir": "2.1.5",
+        "fastq": "^1.6.0"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/@nolyfill/is-core-module": {
+      "version": "1.0.39",
+      "resolved": "https://registry.npmjs.org/@nolyfill/is-core-module/-/is-core-module-1.0.39.tgz",
+      "integrity": "sha512-nn5ozdjYQpUCZlWGuxcJY/KpxkWQs4DcbMCmKojjyrYDEAGy4Ce19NN4v5MduafTwJlbKc99UA8YhSVqq9yPZA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12.4.0"
+      }
+    },
+    "node_modules/@pkgr/core": {
+      "version": "0.2.4",
+      "resolved": "https://registry.npmjs.org/@pkgr/core/-/core-0.2.4.tgz",
+      "integrity": "sha512-ROFF39F6ZrnzSUEmQQZUar0Jt4xVoP9WnDRdWwF4NNcXs3xBTLgBUDoOwW141y1jP+S8nahIbdxbFC7IShw9Iw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^12.20.0 || ^14.18.0 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/pkgr"
+      }
+    },
+    "node_modules/@radix-ui/primitive": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.2.tgz",
+      "integrity": "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA==",
+      "license": "MIT"
+    },
+    "node_modules/@radix-ui/react-arrow": {
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.6.tgz",
+      "integrity": "sha512-2JMfHJf/eVnwq+2dewT3C0acmCWD3XiVA1Da+jTDqo342UlU13WvXtqHhG+yJw5JeQmu4ue2eMy6gcEArLBlcw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-collection": {
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.6.tgz",
+      "integrity": "sha512-PbhRFK4lIEw9ADonj48tiYWzkllz81TM7KVYyyMMw2cwHO7D5h4XKEblL8NlaRisTK3QTe6tBEhDccFUryxHBQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-slot": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-compose-refs": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz",
+      "integrity": "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-context": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz",
+      "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dialog": {
+      "version": "1.1.13",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.13.tgz",
+      "integrity": "sha512-ARFmqUyhIVS3+riWzwGTe7JLjqwqgnODBUZdqpWar/z1WFs9z76fuOs/2BOWCR+YboRn4/WN9aoaGVwqNRr8VA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.2",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dismissable-layer": "1.1.9",
+        "@radix-ui/react-focus-guards": "1.1.2",
+        "@radix-ui/react-focus-scope": "1.1.6",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-portal": "1.1.8",
+        "@radix-ui/react-presence": "1.1.4",
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-slot": "1.2.2",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "aria-hidden": "^1.2.4",
+        "react-remove-scroll": "^2.6.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-direction": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz",
+      "integrity": "sha512-1UEWRX6jnOA2y4H5WczZ44gOOjTEmlqv1uNW4GAJEO5+bauCBhv8snY65Iw5/VOS/ghKN9gr2KjnLKxrsvoMVw==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dismissable-layer": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.9.tgz",
+      "integrity": "sha512-way197PiTvNp+WBP7svMJasHl+vibhWGQDb6Mgf5mhEWJkgb85z7Lfl9TUdkqpWsf8GRNmoopx9ZxCyDzmgRMQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.2",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-escape-keydown": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dropdown-menu": {
+      "version": "2.1.14",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.14.tgz",
+      "integrity": "sha512-lzuyNjoWOoaMFE/VC5FnAAYM16JmQA8ZmucOXtlhm2kKR5TSU95YLAueQ4JYuRmUJmBvSqXaVFGIfuukybwZJQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.2",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-menu": "2.1.14",
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-focus-guards": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.2.tgz",
+      "integrity": "sha512-fyjAACV62oPV925xFCrH8DR5xWhg9KYtJT4s3u54jxp+L/hbpTY2kIeEFFbFe+a/HCE94zGQMZLIpVTPVZDhaA==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-focus-scope": {
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.6.tgz",
+      "integrity": "sha512-r9zpYNUQY+2jWHWZGyddQLL9YHkM/XvSFHVcWs7bdVuxMAnCwTAuy6Pf47Z4nw7dYcUou1vg/VgjjrrH03VeBw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-use-callback-ref": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-id": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz",
+      "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-menu": {
+      "version": "2.1.14",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.14.tgz",
+      "integrity": "sha512-0zSiBAIFq9GSKoSH5PdEaQeRB3RnEGxC+H2P0egtnKoKKLNBH8VBHyVO6/jskhjAezhOIplyRUj7U2lds9A+Yg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.2",
+        "@radix-ui/react-collection": "1.1.6",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-dismissable-layer": "1.1.9",
+        "@radix-ui/react-focus-guards": "1.1.2",
+        "@radix-ui/react-focus-scope": "1.1.6",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-popper": "1.2.6",
+        "@radix-ui/react-portal": "1.1.8",
+        "@radix-ui/react-presence": "1.1.4",
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-roving-focus": "1.1.9",
+        "@radix-ui/react-slot": "1.2.2",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "aria-hidden": "^1.2.4",
+        "react-remove-scroll": "^2.6.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-popper": {
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.6.tgz",
+      "integrity": "sha512-7iqXaOWIjDBfIG7aq8CUEeCSsQMLFdn7VEE8TaFz704DtEzpPHR7w/uuzRflvKgltqSAImgcmxQ7fFX3X7wasg==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/react-dom": "^2.0.0",
+        "@radix-ui/react-arrow": "1.1.6",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-layout-effect": "1.1.1",
+        "@radix-ui/react-use-rect": "1.1.1",
+        "@radix-ui/react-use-size": "1.1.1",
+        "@radix-ui/rect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-portal": {
+      "version": "1.1.8",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.8.tgz",
+      "integrity": "sha512-hQsTUIn7p7fxCPvao/q6wpbxmCwgLrlz+nOrJgC+RwfZqWY/WN+UMqkXzrtKbPrF82P43eCTl3ekeKuyAQbFeg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-presence": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.4.tgz",
+      "integrity": "sha512-ueDqRbdc4/bkaQT3GIpLQssRlFgWaL/U2z/S31qRwwLWoxHLgry3SIfCwhxeQNbirEUXFa+lq3RL3oBYXtcmIA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.2.tgz",
+      "integrity": "sha512-uHa+l/lKfxuDD2zjN/0peM/RhhSmRjr5YWdk/37EnSv1nJ88uvG85DPexSm8HdFQROd2VdERJ6ynXbkCFi+APw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-roving-focus": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.9.tgz",
+      "integrity": "sha512-ZzrIFnMYHHCNqSNCsuN6l7wlewBEq0O0BCSBkabJMFXVO51LRUTq71gLP1UxFvmrXElqmPjA5VX7IqC9VpazAQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.2",
+        "@radix-ui/react-collection": "1.1.6",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-separator": {
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.6.tgz",
+      "integrity": "sha512-Izof3lPpbCfTM7WDta+LRkz31jem890VjEvpVRoWQNKpDUMMVffuyq854XPGP1KYGWWmjmYvHvPFeocWhFCy1w==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-slot": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz",
+      "integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip": {
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.6.tgz",
+      "integrity": "sha512-zYb+9dc9tkoN2JjBDIIPLQtk3gGyz8FMKoqYTb8EMVQ5a5hBcdHPECrsZVI4NpPAUOixhkoqg7Hj5ry5USowfA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.2",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dismissable-layer": "1.1.9",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-popper": "1.2.6",
+        "@radix-ui/react-portal": "1.1.8",
+        "@radix-ui/react-presence": "1.1.4",
+        "@radix-ui/react-primitive": "2.1.2",
+        "@radix-ui/react-slot": "1.2.2",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-visually-hidden": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-callback-ref": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz",
+      "integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-controllable-state": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz",
+      "integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-effect-event": "0.0.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-effect-event": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz",
+      "integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-escape-keydown": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz",
+      "integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-callback-ref": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-layout-effect": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz",
+      "integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-rect": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz",
+      "integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/rect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-size": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz",
+      "integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-visually-hidden": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.2.tgz",
+      "integrity": "sha512-ORCmRUbNiZIv6uV5mhFrhsIKw4UX/N3syZtyqvry61tbGm4JlgQuSn0hk5TwCARsCjkcnuRkSdCE3xfb+ADHew==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/rect": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz",
+      "integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==",
+      "license": "MIT"
+    },
+    "node_modules/@rtsao/scc": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@rtsao/scc/-/scc-1.1.0.tgz",
+      "integrity": "sha512-zt6OdqaDoOnJ1ZYsCYGt9YmWzDXl4vQdKTyJev62gFhRGKdx7mcT54V9KIjg+d2wi9EXsPvAPKe7i7WjfVWB8g==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@rushstack/eslint-patch": {
+      "version": "1.11.0",
+      "resolved": "https://registry.npmjs.org/@rushstack/eslint-patch/-/eslint-patch-1.11.0.tgz",
+      "integrity": "sha512-zxnHvoMQVqewTJr/W4pKjF0bMGiKJv1WX7bSrkl46Hg0QjESbzBROWK0Wg4RphzSOS5Jiy7eFimmM3UgMrMZbQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@sinonjs/commons": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-3.0.1.tgz",
+      "integrity": "sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "type-detect": "4.0.8"
+      }
+    },
+    "node_modules/@sinonjs/fake-timers": {
+      "version": "10.3.0",
+      "resolved": "https://registry.npmjs.org/@sinonjs/fake-timers/-/fake-timers-10.3.0.tgz",
+      "integrity": "sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@sinonjs/commons": "^3.0.0"
+      }
+    },
+    "node_modules/@swc/counter": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz",
+      "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==",
+      "license": "Apache-2.0"
+    },
+    "node_modules/@swc/helpers": {
+      "version": "0.5.15",
+      "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.15.tgz",
+      "integrity": "sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "tslib": "^2.8.0"
+      }
+    },
+    "node_modules/@tailwindcss/node": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.6.tgz",
+      "integrity": "sha512-ed6zQbgmKsjsVvodAS1q1Ld2BolEuxJOSyyNc+vhkjdmfNUDCmQnlXBfQkHrlzNmslxHsQU/bFmzcEbv4xXsLg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@ampproject/remapping": "^2.3.0",
+        "enhanced-resolve": "^5.18.1",
+        "jiti": "^2.4.2",
+        "lightningcss": "1.29.2",
+        "magic-string": "^0.30.17",
+        "source-map-js": "^1.2.1",
+        "tailwindcss": "4.1.6"
+      }
+    },
+    "node_modules/@tailwindcss/oxide": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz",
+      "integrity": "sha512-0bpEBQiGx+227fW4G0fLQ8vuvyy5rsB1YIYNapTq3aRsJ9taF3f5cCaovDjN5pUGKKzcpMrZst/mhNaKAPOHOA==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "dependencies": {
+        "detect-libc": "^2.0.4",
+        "tar": "^7.4.3"
+      },
+      "engines": {
+        "node": ">= 10"
+      },
+      "optionalDependencies": {
+        "@tailwindcss/oxide-android-arm64": "4.1.6",
+        "@tailwindcss/oxide-darwin-arm64": "4.1.6",
+        "@tailwindcss/oxide-darwin-x64": "4.1.6",
+        "@tailwindcss/oxide-freebsd-x64": "4.1.6",
+        "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.6",
+        "@tailwindcss/oxide-linux-arm64-gnu": "4.1.6",
+        "@tailwindcss/oxide-linux-arm64-musl": "4.1.6",
+        "@tailwindcss/oxide-linux-x64-gnu": "4.1.6",
+        "@tailwindcss/oxide-linux-x64-musl": "4.1.6",
+        "@tailwindcss/oxide-wasm32-wasi": "4.1.6",
+        "@tailwindcss/oxide-win32-arm64-msvc": "4.1.6",
+        "@tailwindcss/oxide-win32-x64-msvc": "4.1.6"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-android-arm64": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.6.tgz",
+      "integrity": "sha512-VHwwPiwXtdIvOvqT/0/FLH/pizTVu78FOnI9jQo64kSAikFSZT7K4pjyzoDpSMaveJTGyAKvDjuhxJxKfmvjiQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-darwin-arm64": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.6.tgz",
+      "integrity": "sha512-weINOCcqv1HVBIGptNrk7c6lWgSFFiQMcCpKM4tnVi5x8OY2v1FrV76jwLukfT6pL1hyajc06tyVmZFYXoxvhQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-darwin-x64": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.6.tgz",
+      "integrity": "sha512-3FzekhHG0ww1zQjQ1lPoq0wPrAIVXAbUkWdWM8u5BnYFZgb9ja5ejBqyTgjpo5mfy0hFOoMnMuVDI+7CXhXZaQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-freebsd-x64": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.6.tgz",
+      "integrity": "sha512-4m5F5lpkBZhVQJq53oe5XgJ+aFYWdrgkMwViHjRsES3KEu2m1udR21B1I77RUqie0ZYNscFzY1v9aDssMBZ/1w==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.6.tgz",
+      "integrity": "sha512-qU0rHnA9P/ZoaDKouU1oGPxPWzDKtIfX7eOGi5jOWJKdxieUJdVV+CxWZOpDWlYTd4N3sFQvcnVLJWJ1cLP5TA==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-linux-arm64-gnu": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.6.tgz",
+      "integrity": "sha512-jXy3TSTrbfgyd3UxPQeXC3wm8DAgmigzar99Km9Sf6L2OFfn/k+u3VqmpgHQw5QNfCpPe43em6Q7V76Wx7ogIQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-linux-arm64-musl": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.6.tgz",
+      "integrity": "sha512-8kjivE5xW0qAQ9HX9reVFmZj3t+VmljDLVRJpVBEoTR+3bKMnvC7iLcoSGNIUJGOZy1mLVq7x/gerVg0T+IsYw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-linux-x64-gnu": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.6.tgz",
+      "integrity": "sha512-A4spQhwnWVpjWDLXnOW9PSinO2PTKJQNRmL/aIl2U/O+RARls8doDfs6R41+DAXK0ccacvRyDpR46aVQJJCoCg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-linux-x64-musl": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.6.tgz",
+      "integrity": "sha512-YRee+6ZqdzgiQAHVSLfl3RYmqeeaWVCk796MhXhLQu2kJu2COHBkqlqsqKYx3p8Hmk5pGCQd2jTAoMWWFeyG2A==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.6.tgz",
+      "integrity": "sha512-qAp4ooTYrBQ5pk5jgg54/U1rCJ/9FLYOkkQ/nTE+bVMseMfB6O7J8zb19YTpWuu4UdfRf5zzOrNKfl6T64MNrQ==",
+      "bundleDependencies": [
+        "@napi-rs/wasm-runtime",
+        "@emnapi/core",
+        "@emnapi/runtime",
+        "@tybys/wasm-util",
+        "@emnapi/wasi-threads",
+        "tslib"
+      ],
+      "cpu": [
+        "wasm32"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/core": "^1.4.3",
+        "@emnapi/runtime": "^1.4.3",
+        "@emnapi/wasi-threads": "^1.0.2",
+        "@napi-rs/wasm-runtime": "^0.2.9",
+        "@tybys/wasm-util": "^0.9.0",
+        "tslib": "^2.8.0"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.6.tgz",
+      "integrity": "sha512-nqpDWk0Xr8ELO/nfRUDjk1pc9wDJ3ObeDdNMHLaymc4PJBWj11gdPCWZFKSK2AVKjJQC7J2EfmSmf47GN7OuLg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-win32-x64-msvc": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.6.tgz",
+      "integrity": "sha512-5k9xF33xkfKpo9wCvYcegQ21VwIBU1/qEbYlVukfEIyQbEA47uK8AAwS7NVjNE3vHzcmxMYwd0l6L4pPjjm1rQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tailwindcss/postcss": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.6.tgz",
+      "integrity": "sha512-ELq+gDMBuRXPJlpE3PEen+1MhnHAQQrh2zF0dI1NXOlEWfr2qWf2CQdr5jl9yANv8RErQaQ2l6nIFO9OSCVq/g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@alloc/quick-lru": "^5.2.0",
+        "@tailwindcss/node": "4.1.6",
+        "@tailwindcss/oxide": "4.1.6",
+        "postcss": "^8.4.41",
+        "tailwindcss": "4.1.6"
+      }
+    },
+    "node_modules/@testing-library/dom": {
+      "version": "10.4.0",
+      "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.0.tgz",
+      "integrity": "sha512-pemlzrSESWbdAloYml3bAJMEfNh1Z7EduzqPKprCH5S341frlpYnUEW0H72dLxa6IsYr+mPno20GiSm+h9dEdQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.10.4",
+        "@babel/runtime": "^7.12.5",
+        "@types/aria-query": "^5.0.1",
+        "aria-query": "5.3.0",
+        "chalk": "^4.1.0",
+        "dom-accessibility-api": "^0.5.9",
+        "lz-string": "^1.5.0",
+        "pretty-format": "^27.0.2"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@testing-library/dom/node_modules/aria-query": {
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz",
+      "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "dequal": "^2.0.3"
+      }
+    },
+    "node_modules/@testing-library/jest-dom": {
+      "version": "6.6.3",
+      "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.6.3.tgz",
+      "integrity": "sha512-IteBhl4XqYNkM54f4ejhLRJiZNqcSCoXUOG2CPK7qbD322KjQozM4kHQOfkG2oln9b9HTYqs+Sae8vBATubxxA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@adobe/css-tools": "^4.4.0",
+        "aria-query": "^5.0.0",
+        "chalk": "^3.0.0",
+        "css.escape": "^1.5.1",
+        "dom-accessibility-api": "^0.6.3",
+        "lodash": "^4.17.21",
+        "redent": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=14",
+        "npm": ">=6",
+        "yarn": ">=1"
+      }
+    },
+    "node_modules/@testing-library/jest-dom/node_modules/chalk": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz",
+      "integrity": "sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.1.0",
+        "supports-color": "^7.1.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/@testing-library/jest-dom/node_modules/dom-accessibility-api": {
+      "version": "0.6.3",
+      "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.6.3.tgz",
+      "integrity": "sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@testing-library/react": {
+      "version": "16.3.0",
+      "resolved": "https://registry.npmjs.org/@testing-library/react/-/react-16.3.0.tgz",
+      "integrity": "sha512-kFSyxiEDwv1WLl2fgsq6pPBbw5aWKrsY2/noi1Id0TK0UParSF62oFQFGHXIyaG4pp2tEub/Zlel+fjjZILDsw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime": "^7.12.5"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "@testing-library/dom": "^10.0.0",
+        "@types/react": "^18.0.0 || ^19.0.0",
+        "@types/react-dom": "^18.0.0 || ^19.0.0",
+        "react": "^18.0.0 || ^19.0.0",
+        "react-dom": "^18.0.0 || ^19.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@tootallnate/once": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-2.0.0.tgz",
+      "integrity": "sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@tsconfig/node10": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz",
+      "integrity": "sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tsconfig/node12": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz",
+      "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tsconfig/node14": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz",
+      "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tsconfig/node16": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz",
+      "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tybys/wasm-util": {
+      "version": "0.9.0",
+      "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.9.0.tgz",
+      "integrity": "sha512-6+7nlbMVX/PVDCwaIQ8nTOPveOcFLSt8GcXdx8hD0bt39uWxYT88uXzqTd4fTvqta7oeUJqudepapKNt2DYJFw==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@types/aria-query": {
+      "version": "5.0.4",
+      "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz",
+      "integrity": "sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/babel__core": {
+      "version": "7.20.5",
+      "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
+      "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/parser": "^7.20.7",
+        "@babel/types": "^7.20.7",
+        "@types/babel__generator": "*",
+        "@types/babel__template": "*",
+        "@types/babel__traverse": "*"
+      }
+    },
+    "node_modules/@types/babel__generator": {
+      "version": "7.27.0",
+      "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.27.0.tgz",
+      "integrity": "sha512-ufFd2Xi92OAVPYsy+P4n7/U7e68fex0+Ee8gSG9KX7eo084CWiQ4sdxktvdl0bOPupXtVJPY19zk6EwWqUQ8lg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/types": "^7.0.0"
+      }
+    },
+    "node_modules/@types/babel__template": {
+      "version": "7.4.4",
+      "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz",
+      "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/parser": "^7.1.0",
+        "@babel/types": "^7.0.0"
+      }
+    },
+    "node_modules/@types/babel__traverse": {
+      "version": "7.20.7",
+      "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.20.7.tgz",
+      "integrity": "sha512-dkO5fhS7+/oos4ciWxyEyjWe48zmG6wbCheo/G2ZnHx4fs3EU6YC6UM8rk56gAjNJ9P3MTH2jo5jb92/K6wbng==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/types": "^7.20.7"
+      }
+    },
+    "node_modules/@types/estree": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.7.tgz",
+      "integrity": "sha512-w28IoSUCJpidD/TGviZwwMJckNESJZXFu7NBZ5YJ4mEUnNraUn9Pm8HSZm/jDF1pDWYKspWE7oVphigUPRakIQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/graceful-fs": {
+      "version": "4.1.9",
+      "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz",
+      "integrity": "sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
+    "node_modules/@types/istanbul-lib-coverage": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz",
+      "integrity": "sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/istanbul-lib-report": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.3.tgz",
+      "integrity": "sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/istanbul-lib-coverage": "*"
+      }
+    },
+    "node_modules/@types/istanbul-reports": {
+      "version": "3.0.4",
+      "resolved": "https://registry.npmjs.org/@types/istanbul-reports/-/istanbul-reports-3.0.4.tgz",
+      "integrity": "sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/istanbul-lib-report": "*"
+      }
+    },
+    "node_modules/@types/jest": {
+      "version": "29.5.14",
+      "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.14.tgz",
+      "integrity": "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "expect": "^29.0.0",
+        "pretty-format": "^29.0.0"
+      }
+    },
+    "node_modules/@types/jest/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/@types/jest/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@types/jest/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/jsdom": {
+      "version": "20.0.1",
+      "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-20.0.1.tgz",
+      "integrity": "sha512-d0r18sZPmMQr1eG35u12FZfhIXNrnsPU/g5wvRKCUf/tOGilKKwYMYGqh33BNR6ba+2gkHw1EUiHoN3mn7E5IQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*",
+        "@types/tough-cookie": "*",
+        "parse5": "^7.0.0"
+      }
+    },
+    "node_modules/@types/json-schema": {
+      "version": "7.0.15",
+      "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
+      "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/json5": {
+      "version": "0.0.29",
+      "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
+      "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/node": {
+      "version": "20.17.47",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.47.tgz",
+      "integrity": "sha512-3dLX0Upo1v7RvUimvxLeXqwrfyKxUINk0EAM83swP2mlSUcwV73sZy8XhNz8bcZ3VbsfQyC/y6jRdL5tgCNpDQ==",
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~6.19.2"
+      }
+    },
+    "node_modules/@types/node-fetch": {
+      "version": "2.6.12",
+      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.12.tgz",
+      "integrity": "sha512-8nneRWKCg3rMtF69nLQJnOYUcbafYeFSjqkw3jCRLsqkWFlHaoQrr5mXmofFGOx3DKn7UfmBMyov8ySvLRVldA==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*",
+        "form-data": "^4.0.0"
+      }
+    },
+    "node_modules/@types/react": {
+      "version": "19.1.4",
+      "resolved": "https://registry.npmjs.org/@types/react/-/react-19.1.4.tgz",
+      "integrity": "sha512-EB1yiiYdvySuIITtD5lhW4yPyJ31RkJkkDw794LaQYrxCSaQV/47y5o1FMC4zF9ZyjUjzJMZwbovEnT5yHTW6g==",
+      "devOptional": true,
+      "license": "MIT",
+      "dependencies": {
+        "csstype": "^3.0.2"
+      }
+    },
+    "node_modules/@types/react-dom": {
+      "version": "19.1.5",
+      "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.1.5.tgz",
+      "integrity": "sha512-CMCjrWucUBZvohgZxkjd6S9h0nZxXjzus6yDfUb+xLxYM7VvjKNH1tQrE9GWLql1XoOP4/Ds3bwFqShHUYraGg==",
+      "devOptional": true,
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "^19.0.0"
+      }
+    },
+    "node_modules/@types/stack-utils": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz",
+      "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/tough-cookie": {
+      "version": "4.0.5",
+      "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz",
+      "integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/yargs": {
+      "version": "17.0.33",
+      "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz",
+      "integrity": "sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/yargs-parser": "*"
+      }
+    },
+    "node_modules/@types/yargs-parser": {
+      "version": "21.0.3",
+      "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz",
+      "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@typescript-eslint/eslint-plugin": {
+      "version": "8.32.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.32.1.tgz",
+      "integrity": "sha512-6u6Plg9nP/J1GRpe/vcjjabo6Uc5YQPAMxsgQyGC/I0RuukiG1wIe3+Vtg3IrSCVJDmqK3j8adrtzXSENRtFgg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@eslint-community/regexpp": "^4.10.0",
+        "@typescript-eslint/scope-manager": "8.32.1",
+        "@typescript-eslint/type-utils": "8.32.1",
+        "@typescript-eslint/utils": "8.32.1",
+        "@typescript-eslint/visitor-keys": "8.32.1",
+        "graphemer": "^1.4.0",
+        "ignore": "^7.0.0",
+        "natural-compare": "^1.4.0",
+        "ts-api-utils": "^2.1.0"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      },
+      "peerDependencies": {
+        "@typescript-eslint/parser": "^8.0.0 || ^8.0.0-alpha.0",
+        "eslint": "^8.57.0 || ^9.0.0",
+        "typescript": ">=4.8.4 <5.9.0"
+      }
+    },
+    "node_modules/@typescript-eslint/eslint-plugin/node_modules/ignore": {
+      "version": "7.0.4",
+      "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.4.tgz",
+      "integrity": "sha512-gJzzk+PQNznz8ysRrC0aOkBNVRBDtE1n53IqyqEf3PXrYwomFs5q4pGMizBMJF+ykh03insJ27hB8gSrD2Hn8A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 4"
+      }
+    },
+    "node_modules/@typescript-eslint/parser": {
+      "version": "8.32.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.32.1.tgz",
+      "integrity": "sha512-LKMrmwCPoLhM45Z00O1ulb6jwyVr2kr3XJp+G+tSEZcbauNnScewcQwtJqXDhXeYPDEjZ8C1SjXm015CirEmGg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@typescript-eslint/scope-manager": "8.32.1",
+        "@typescript-eslint/types": "8.32.1",
+        "@typescript-eslint/typescript-estree": "8.32.1",
+        "@typescript-eslint/visitor-keys": "8.32.1",
+        "debug": "^4.3.4"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      },
+      "peerDependencies": {
+        "eslint": "^8.57.0 || ^9.0.0",
+        "typescript": ">=4.8.4 <5.9.0"
+      }
+    },
+    "node_modules/@typescript-eslint/scope-manager": {
+      "version": "8.32.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.32.1.tgz",
+      "integrity": "sha512-7IsIaIDeZn7kffk7qXC3o6Z4UblZJKV3UBpkvRNpr5NSyLji7tvTcvmnMNYuYLyh26mN8W723xpo3i4MlD33vA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@typescript-eslint/types": "8.32.1",
+        "@typescript-eslint/visitor-keys": "8.32.1"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      }
+    },
+    "node_modules/@typescript-eslint/type-utils": {
+      "version": "8.32.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.32.1.tgz",
+      "integrity": "sha512-mv9YpQGA8iIsl5KyUPi+FGLm7+bA4fgXaeRcFKRDRwDMu4iwrSHeDPipwueNXhdIIZltwCJv+NkxftECbIZWfA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@typescript-eslint/typescript-estree": "8.32.1",
+        "@typescript-eslint/utils": "8.32.1",
+        "debug": "^4.3.4",
+        "ts-api-utils": "^2.1.0"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      },
+      "peerDependencies": {
+        "eslint": "^8.57.0 || ^9.0.0",
+        "typescript": ">=4.8.4 <5.9.0"
+      }
+    },
+    "node_modules/@typescript-eslint/types": {
+      "version": "8.32.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.32.1.tgz",
+      "integrity": "sha512-YmybwXUJcgGqgAp6bEsgpPXEg6dcCyPyCSr0CAAueacR/CCBi25G3V8gGQ2kRzQRBNol7VQknxMs9HvVa9Rvfg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      }
+    },
+    "node_modules/@typescript-eslint/typescript-estree": {
+      "version": "8.32.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.32.1.tgz",
+      "integrity": "sha512-Y3AP9EIfYwBb4kWGb+simvPaqQoT5oJuzzj9m0i6FCY6SPvlomY2Ei4UEMm7+FXtlNJbor80ximyslzaQF6xhg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@typescript-eslint/types": "8.32.1",
+        "@typescript-eslint/visitor-keys": "8.32.1",
+        "debug": "^4.3.4",
+        "fast-glob": "^3.3.2",
+        "is-glob": "^4.0.3",
+        "minimatch": "^9.0.4",
+        "semver": "^7.6.0",
+        "ts-api-utils": "^2.1.0"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      },
+      "peerDependencies": {
+        "typescript": ">=4.8.4 <5.9.0"
+      }
+    },
+    "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
+      "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0"
+      }
+    },
+    "node_modules/@typescript-eslint/typescript-estree/node_modules/fast-glob": {
+      "version": "3.3.3",
+      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz",
+      "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.stat": "^2.0.2",
+        "@nodelib/fs.walk": "^1.2.3",
+        "glob-parent": "^5.1.2",
+        "merge2": "^1.3.0",
+        "micromatch": "^4.0.8"
+      },
+      "engines": {
+        "node": ">=8.6.0"
+      }
+    },
+    "node_modules/@typescript-eslint/typescript-estree/node_modules/glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": {
+      "version": "9.0.5",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
+      "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "brace-expansion": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=16 || 14 >=14.17"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/@typescript-eslint/utils": {
+      "version": "8.32.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.32.1.tgz",
+      "integrity": "sha512-DsSFNIgLSrc89gpq1LJB7Hm1YpuhK086DRDJSNrewcGvYloWW1vZLHBTIvarKZDcAORIy/uWNx8Gad+4oMpkSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@eslint-community/eslint-utils": "^4.7.0",
+        "@typescript-eslint/scope-manager": "8.32.1",
+        "@typescript-eslint/types": "8.32.1",
+        "@typescript-eslint/typescript-estree": "8.32.1"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      },
+      "peerDependencies": {
+        "eslint": "^8.57.0 || ^9.0.0",
+        "typescript": ">=4.8.4 <5.9.0"
+      }
+    },
+    "node_modules/@typescript-eslint/visitor-keys": {
+      "version": "8.32.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.32.1.tgz",
+      "integrity": "sha512-ar0tjQfObzhSaW3C3QNmTc5ofj0hDoNQ5XWrCy6zDyabdr0TWhCkClp+rywGNj/odAFBVzzJrK4tEq5M4Hmu4w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@typescript-eslint/types": "8.32.1",
+        "eslint-visitor-keys": "^4.2.0"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      }
+    },
+    "node_modules/@unrs/resolver-binding-darwin-arm64": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-darwin-arm64/-/resolver-binding-darwin-arm64-1.7.2.tgz",
+      "integrity": "sha512-vxtBno4xvowwNmO/ASL0Y45TpHqmNkAaDtz4Jqb+clmcVSSl8XCG/PNFFkGsXXXS6AMjP+ja/TtNCFFa1QwLRg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-darwin-x64": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-darwin-x64/-/resolver-binding-darwin-x64-1.7.2.tgz",
+      "integrity": "sha512-qhVa8ozu92C23Hsmv0BF4+5Dyyd5STT1FolV4whNgbY6mj3kA0qsrGPe35zNR3wAN7eFict3s4Rc2dDTPBTuFQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-freebsd-x64": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-freebsd-x64/-/resolver-binding-freebsd-x64-1.7.2.tgz",
+      "integrity": "sha512-zKKdm2uMXqLFX6Ac7K5ElnnG5VIXbDlFWzg4WJ8CGUedJryM5A3cTgHuGMw1+P5ziV8CRhnSEgOnurTI4vpHpg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-arm-gnueabihf": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-arm-gnueabihf/-/resolver-binding-linux-arm-gnueabihf-1.7.2.tgz",
+      "integrity": "sha512-8N1z1TbPnHH+iDS/42GJ0bMPLiGK+cUqOhNbMKtWJ4oFGzqSJk/zoXFzcQkgtI63qMcUI7wW1tq2usZQSb2jxw==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-arm-musleabihf": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-arm-musleabihf/-/resolver-binding-linux-arm-musleabihf-1.7.2.tgz",
+      "integrity": "sha512-tjYzI9LcAXR9MYd9rO45m1s0B/6bJNuZ6jeOxo1pq1K6OBuRMMmfyvJYval3s9FPPGmrldYA3mi4gWDlWuTFGA==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-arm64-gnu": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-arm64-gnu/-/resolver-binding-linux-arm64-gnu-1.7.2.tgz",
+      "integrity": "sha512-jon9M7DKRLGZ9VYSkFMflvNqu9hDtOCEnO2QAryFWgT6o6AXU8du56V7YqnaLKr6rAbZBWYsYpikF226v423QA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-arm64-musl": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-arm64-musl/-/resolver-binding-linux-arm64-musl-1.7.2.tgz",
+      "integrity": "sha512-c8Cg4/h+kQ63pL43wBNaVMmOjXI/X62wQmru51qjfTvI7kmCy5uHTJvK/9LrF0G8Jdx8r34d019P1DVJmhXQpA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-ppc64-gnu": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-ppc64-gnu/-/resolver-binding-linux-ppc64-gnu-1.7.2.tgz",
+      "integrity": "sha512-A+lcwRFyrjeJmv3JJvhz5NbcCkLQL6Mk16kHTNm6/aGNc4FwPHPE4DR9DwuCvCnVHvF5IAd9U4VIs/VvVir5lg==",
+      "cpu": [
+        "ppc64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-riscv64-gnu": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-riscv64-gnu/-/resolver-binding-linux-riscv64-gnu-1.7.2.tgz",
+      "integrity": "sha512-hQQ4TJQrSQW8JlPm7tRpXN8OCNP9ez7PajJNjRD1ZTHQAy685OYqPrKjfaMw/8LiHCt8AZ74rfUVHP9vn0N69Q==",
+      "cpu": [
+        "riscv64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-riscv64-musl": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-riscv64-musl/-/resolver-binding-linux-riscv64-musl-1.7.2.tgz",
+      "integrity": "sha512-NoAGbiqrxtY8kVooZ24i70CjLDlUFI7nDj3I9y54U94p+3kPxwd2L692YsdLa+cqQ0VoqMWoehDFp21PKRUoIQ==",
+      "cpu": [
+        "riscv64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-s390x-gnu": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-s390x-gnu/-/resolver-binding-linux-s390x-gnu-1.7.2.tgz",
+      "integrity": "sha512-KaZByo8xuQZbUhhreBTW+yUnOIHUsv04P8lKjQ5otiGoSJ17ISGYArc+4vKdLEpGaLbemGzr4ZeUbYQQsLWFjA==",
+      "cpu": [
+        "s390x"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-x64-gnu": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-x64-gnu/-/resolver-binding-linux-x64-gnu-1.7.2.tgz",
+      "integrity": "sha512-dEidzJDubxxhUCBJ/SHSMJD/9q7JkyfBMT77Px1npl4xpg9t0POLvnWywSk66BgZS/b2Hy9Y1yFaoMTFJUe9yg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-linux-x64-musl": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-x64-musl/-/resolver-binding-linux-x64-musl-1.7.2.tgz",
+      "integrity": "sha512-RvP+Ux3wDjmnZDT4XWFfNBRVG0fMsc+yVzNFUqOflnDfZ9OYujv6nkh+GOr+watwrW4wdp6ASfG/e7bkDradsw==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-wasm32-wasi": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-wasm32-wasi/-/resolver-binding-wasm32-wasi-1.7.2.tgz",
+      "integrity": "sha512-y797JBmO9IsvXVRCKDXOxjyAE4+CcZpla2GSoBQ33TVb3ILXuFnMrbR/QQZoauBYeOFuu4w3ifWLw52sdHGz6g==",
+      "cpu": [
+        "wasm32"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@napi-rs/wasm-runtime": "^0.2.9"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/@unrs/resolver-binding-win32-arm64-msvc": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-win32-arm64-msvc/-/resolver-binding-win32-arm64-msvc-1.7.2.tgz",
+      "integrity": "sha512-gtYTh4/VREVSLA+gHrfbWxaMO/00y+34htY7XpioBTy56YN2eBjkPrY1ML1Zys89X3RJDKVaogzwxlM1qU7egg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-win32-ia32-msvc": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-win32-ia32-msvc/-/resolver-binding-win32-ia32-msvc-1.7.2.tgz",
+      "integrity": "sha512-Ywv20XHvHTDRQs12jd3MY8X5C8KLjDbg/jyaal/QLKx3fAShhJyD4blEANInsjxW3P7isHx1Blt56iUDDJO3jg==",
+      "cpu": [
+        "ia32"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/@unrs/resolver-binding-win32-x64-msvc": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-win32-x64-msvc/-/resolver-binding-win32-x64-msvc-1.7.2.tgz",
+      "integrity": "sha512-friS8NEQfHaDbkThxopGk+LuE5v3iY0StruifjQEt7SLbA46OnfgMO15sOTkbpJkol6RB+1l1TYPXh0sCddpvA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/abab": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/abab/-/abab-2.0.6.tgz",
+      "integrity": "sha512-j2afSsaIENvHZN2B8GOpF566vZ5WVk5opAiMTvWgaQT8DkbOqsTfvNAvHoRGU2zzP8cPoqys+xHTRDWW8L+/BA==",
+      "deprecated": "Use your platform's native atob() and btoa() methods instead",
+      "dev": true,
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/abort-controller": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
+      "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
+      "license": "MIT",
+      "dependencies": {
+        "event-target-shim": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=6.5"
+      }
+    },
+    "node_modules/accepts": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz",
+      "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mime-types": "^3.0.0",
+        "negotiator": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/acorn": {
+      "version": "8.14.1",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.1.tgz",
+      "integrity": "sha512-OvQ/2pUDKmgfCg++xsTX1wGxfTaszcHVcTctW4UJB4hibJx2HXxxO5UmVgyjMa+ZDsiaf5wWLXYpRWMmBI0QHg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "acorn": "bin/acorn"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/acorn-globals": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/acorn-globals/-/acorn-globals-7.0.1.tgz",
+      "integrity": "sha512-umOSDSDrfHbTNPuNpC2NSnnA3LUrqpevPb4T9jRx4MagXNS0rs+gwiTcAvqCRmsD6utzsrzNt+ebm00SNWiC3Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "acorn": "^8.1.0",
+        "acorn-walk": "^8.0.2"
+      }
+    },
+    "node_modules/acorn-jsx": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
+      "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==",
+      "dev": true,
+      "license": "MIT",
+      "peerDependencies": {
+        "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      }
+    },
+    "node_modules/acorn-walk": {
+      "version": "8.3.4",
+      "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz",
+      "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "acorn": "^8.11.0"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/agent-base": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz",
+      "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 6.0.0"
+      }
+    },
+    "node_modules/agentkeepalive": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",
+      "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==",
+      "license": "MIT",
+      "dependencies": {
+        "humanize-ms": "^1.2.1"
+      },
+      "engines": {
+        "node": ">= 8.0.0"
+      }
+    },
+    "node_modules/ajv": {
+      "version": "6.12.6",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+      "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.1",
+        "fast-json-stable-stringify": "^2.0.0",
+        "json-schema-traverse": "^0.4.1",
+        "uri-js": "^4.2.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/ansi-escapes": {
+      "version": "4.3.2",
+      "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz",
+      "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "type-fest": "^0.21.3"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/anymatch": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
+      "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "normalize-path": "^3.0.0",
+        "picomatch": "^2.0.4"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/arg": {
+      "version": "4.1.3",
+      "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz",
+      "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/argparse": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
+      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
+      "dev": true,
+      "license": "Python-2.0"
+    },
+    "node_modules/aria-hidden": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.4.tgz",
+      "integrity": "sha512-y+CcFFwelSXpLZk/7fMB2mUbGtX9lKycf1MWJ7CaTIERyitVlyQx6C+sxcROU2BAJ24OiZyK+8wj2i8AlBoS3A==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/aria-query": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz",
+      "integrity": "sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/array-buffer-byte-length": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.2.tgz",
+      "integrity": "sha512-LHE+8BuR7RYGDKvnrmcuSq3tDcKv9OFEXQt/HpbZhY7V6h0zlUXutnAD82GiFx9rdieCMjkvtcsPqBwgUl1Iiw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "is-array-buffer": "^3.0.5"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/array-includes": {
+      "version": "3.1.8",
+      "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.8.tgz",
+      "integrity": "sha512-itaWrbYbqpGXkGhZPGUulwnhVf5Hpy1xiCFsGqyIGglbBxmG5vSjxQen3/WGOjPpNEv1RtBLKxbmVXm8HpJStQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.7",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.2",
+        "es-object-atoms": "^1.0.0",
+        "get-intrinsic": "^1.2.4",
+        "is-string": "^1.0.7"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/array.prototype.findlast": {
+      "version": "1.2.5",
+      "resolved": "https://registry.npmjs.org/array.prototype.findlast/-/array.prototype.findlast-1.2.5.tgz",
+      "integrity": "sha512-CVvd6FHg1Z3POpBLxO6E6zr+rSKEQ9L6rZHAaY7lLfhKsWYUBBOuMs0e9o24oopj6H+geRCX0YJ+TJLBK2eHyQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.7",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.2",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.0.0",
+        "es-shim-unscopables": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/array.prototype.findlastindex": {
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.6.tgz",
+      "integrity": "sha512-F/TKATkzseUExPlfvmwQKGITM3DGTK+vkAsCZoDc5daVygbJBnjEUCbgkAvVFsgfXfX4YIqZ/27G3k3tdXrTxQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.4",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.9",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.1.1",
+        "es-shim-unscopables": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/array.prototype.flat": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.3.3.tgz",
+      "integrity": "sha512-rwG/ja1neyLqCuGZ5YYrznA62D4mZXg0i1cIskIUKSiqF3Cje9/wXAls9B9s1Wa2fomMsIv8czB8jZcPmxCXFg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.5",
+        "es-shim-unscopables": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/array.prototype.flatmap": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.3.tgz",
+      "integrity": "sha512-Y7Wt51eKJSyi80hFrJCePGGNo5ktJCslFuboqJsbf57CCPcm5zztluPlc4/aD8sWsKvlwatezpV4U1efk8kpjg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.5",
+        "es-shim-unscopables": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/array.prototype.tosorted": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/array.prototype.tosorted/-/array.prototype.tosorted-1.1.4.tgz",
+      "integrity": "sha512-p6Fx8B7b7ZhL/gmUsAy0D15WhvDccw3mnGNbZpi3pmeJdxtWsj2jEaI4Y6oo3XiHfzuSgPwKc04MYt6KgvC/wA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.7",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.3",
+        "es-errors": "^1.3.0",
+        "es-shim-unscopables": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/arraybuffer.prototype.slice": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.4.tgz",
+      "integrity": "sha512-BNoCY6SXXPQ7gF2opIP4GBE+Xw7U+pHMYKuzjgCN3GwiaIR09UUeKfheyIry77QtrCBlC0KK0q5/TER/tYh3PQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "array-buffer-byte-length": "^1.0.1",
+        "call-bind": "^1.0.8",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.5",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.6",
+        "is-array-buffer": "^3.0.4"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/ast-types-flow": {
+      "version": "0.0.8",
+      "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz",
+      "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/async-function": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/async-function/-/async-function-1.0.0.tgz",
+      "integrity": "sha512-hsU18Ae8CDTR6Kgu9DYf0EbCr/a5iGL0rytQDobUcdpYOKokk8LEjVphnXkDkgpi0wYVsqrXuP0bZxJaTqdgoA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/asynckit": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
+      "license": "MIT"
+    },
+    "node_modules/available-typed-arrays": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz",
+      "integrity": "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "possible-typed-array-names": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/axe-core": {
+      "version": "4.10.3",
+      "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.10.3.tgz",
+      "integrity": "sha512-Xm7bpRXnDSX2YE2YFfBk2FnF0ep6tmG7xPh8iHee8MIcrgq762Nkce856dYtJYLkuIoYZvGfTs/PbZhideTcEg==",
+      "dev": true,
+      "license": "MPL-2.0",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/axobject-query": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-4.1.0.tgz",
+      "integrity": "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/babel-jest": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz",
+      "integrity": "sha512-BrvGY3xZSwEcCzKvKsCi2GgHqDqsYkOP4/by5xCgIwGXQxIEh+8ew3gmrE1y7XRR6LHZIj6yLYnUi/mm2KXKBg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/transform": "^29.7.0",
+        "@types/babel__core": "^7.1.14",
+        "babel-plugin-istanbul": "^6.1.1",
+        "babel-preset-jest": "^29.6.3",
+        "chalk": "^4.0.0",
+        "graceful-fs": "^4.2.9",
+        "slash": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.8.0"
+      }
+    },
+    "node_modules/babel-plugin-istanbul": {
+      "version": "6.1.1",
+      "resolved": "https://registry.npmjs.org/babel-plugin-istanbul/-/babel-plugin-istanbul-6.1.1.tgz",
+      "integrity": "sha512-Y1IQok9821cC9onCx5otgFfRm7Lm+I+wwxOx738M/WLPZ9Q42m4IG5W0FNX8WLL2gYMZo3JkuXIH2DOpWM+qwA==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.0.0",
+        "@istanbuljs/load-nyc-config": "^1.0.0",
+        "@istanbuljs/schema": "^0.1.2",
+        "istanbul-lib-instrument": "^5.0.4",
+        "test-exclude": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/babel-plugin-istanbul/node_modules/istanbul-lib-instrument": {
+      "version": "5.2.1",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-5.2.1.tgz",
+      "integrity": "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@babel/core": "^7.12.3",
+        "@babel/parser": "^7.14.7",
+        "@istanbuljs/schema": "^0.1.2",
+        "istanbul-lib-coverage": "^3.2.0",
+        "semver": "^6.3.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/babel-plugin-istanbul/node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
+    "node_modules/babel-plugin-jest-hoist": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-29.6.3.tgz",
+      "integrity": "sha512-ESAc/RJvGTFEzRwOTT4+lNDk/GNHMkKbNzsvT0qKRfDyyYTskxB5rnU2njIDYVxXCBHHEI1c0YwHob3WaYujOg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/template": "^7.3.3",
+        "@babel/types": "^7.3.3",
+        "@types/babel__core": "^7.1.14",
+        "@types/babel__traverse": "^7.0.6"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/babel-preset-current-node-syntax": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/babel-preset-current-node-syntax/-/babel-preset-current-node-syntax-1.1.0.tgz",
+      "integrity": "sha512-ldYss8SbBlWva1bs28q78Ju5Zq1F+8BrqBZZ0VFhLBvhh6lCpC2o3gDJi/5DRLs9FgYZCnmPYIVFU4lRXCkyUw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/plugin-syntax-async-generators": "^7.8.4",
+        "@babel/plugin-syntax-bigint": "^7.8.3",
+        "@babel/plugin-syntax-class-properties": "^7.12.13",
+        "@babel/plugin-syntax-class-static-block": "^7.14.5",
+        "@babel/plugin-syntax-import-attributes": "^7.24.7",
+        "@babel/plugin-syntax-import-meta": "^7.10.4",
+        "@babel/plugin-syntax-json-strings": "^7.8.3",
+        "@babel/plugin-syntax-logical-assignment-operators": "^7.10.4",
+        "@babel/plugin-syntax-nullish-coalescing-operator": "^7.8.3",
+        "@babel/plugin-syntax-numeric-separator": "^7.10.4",
+        "@babel/plugin-syntax-object-rest-spread": "^7.8.3",
+        "@babel/plugin-syntax-optional-catch-binding": "^7.8.3",
+        "@babel/plugin-syntax-optional-chaining": "^7.8.3",
+        "@babel/plugin-syntax-private-property-in-object": "^7.14.5",
+        "@babel/plugin-syntax-top-level-await": "^7.14.5"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0"
+      }
+    },
+    "node_modules/babel-preset-jest": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-29.6.3.tgz",
+      "integrity": "sha512-0B3bhxR6snWXJZtR/RliHTDPRgn1sNHOR0yVtq/IiQFyuOVjFS+wuio/R4gSNkyYmKmJB4wGZv2NZanmKmTnNA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "babel-plugin-jest-hoist": "^29.6.3",
+        "babel-preset-current-node-syntax": "^1.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0"
+      }
+    },
+    "node_modules/balanced-match": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
+      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/body-parser": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.0.tgz",
+      "integrity": "sha512-02qvAaxv8tp7fBa/mw1ga98OGm+eCbqzJOKoRt70sLmfEEi+jyBYVTDGfCL/k06/4EMk/z01gCe7HoCH/f2LTg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "bytes": "^3.1.2",
+        "content-type": "^1.0.5",
+        "debug": "^4.4.0",
+        "http-errors": "^2.0.0",
+        "iconv-lite": "^0.6.3",
+        "on-finished": "^2.4.1",
+        "qs": "^6.14.0",
+        "raw-body": "^3.0.0",
+        "type-is": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/brace-expansion": {
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
+      "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
+      }
+    },
+    "node_modules/braces": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fill-range": "^7.1.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/browserslist": {
+      "version": "4.24.5",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.5.tgz",
+      "integrity": "sha512-FDToo4Wo82hIdgc1CQ+NQD0hEhmpPjrZ3hiUgwgOG6IuTdlpr8jdjyG24P6cNP1yJpTLzS5OcGgSw0xmDU1/Tw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "caniuse-lite": "^1.0.30001716",
+        "electron-to-chromium": "^1.5.149",
+        "node-releases": "^2.0.19",
+        "update-browserslist-db": "^1.1.3"
+      },
+      "bin": {
+        "browserslist": "cli.js"
+      },
+      "engines": {
+        "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
+      }
+    },
+    "node_modules/bser": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/bser/-/bser-2.1.1.tgz",
+      "integrity": "sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "node-int64": "^0.4.0"
+      }
+    },
+    "node_modules/buffer-from": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
+      "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/busboy": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
+      "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
+      "dependencies": {
+        "streamsearch": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=10.16.0"
+      }
+    },
+    "node_modules/bytes": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
+      "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/call-bind": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.8.tgz",
+      "integrity": "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.0",
+        "es-define-property": "^1.0.0",
+        "get-intrinsic": "^1.2.4",
+        "set-function-length": "^1.2.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/call-bind-apply-helpers": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
+      "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/call-bound": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz",
+      "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.2",
+        "get-intrinsic": "^1.3.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/callsites": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
+      "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/camelcase": {
+      "version": "5.3.1",
+      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz",
+      "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/caniuse-lite": {
+      "version": "1.0.30001718",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001718.tgz",
+      "integrity": "sha512-AflseV1ahcSunK53NfEs9gFWgOEmzr0f+kaMFA4xiLZlr9Hzt7HxcSpIFcnNCUkz6R6dWKa54rUz3HUmI3nVcw==",
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "CC-BY-4.0"
+    },
+    "node_modules/chalk": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
+      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.1.0",
+        "supports-color": "^7.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/chalk?sponsor=1"
+      }
+    },
+    "node_modules/char-regex": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/char-regex/-/char-regex-1.0.2.tgz",
+      "integrity": "sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/chownr": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
+      "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
+      "dev": true,
+      "license": "BlueOak-1.0.0",
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/ci-info": {
+      "version": "3.9.0",
+      "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.9.0.tgz",
+      "integrity": "sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/sibiraj-s"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/cjs-module-lexer": {
+      "version": "1.4.3",
+      "resolved": "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.4.3.tgz",
+      "integrity": "sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/class-variance-authority": {
+      "version": "0.7.1",
+      "resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.7.1.tgz",
+      "integrity": "sha512-Ka+9Trutv7G8M6WT6SeiRWz792K5qEqIGEGzXKhAE6xOWAY6pPH8U+9IY3oCMv6kqTmLsv7Xh/2w2RigkePMsg==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "clsx": "^2.1.1"
+      },
+      "funding": {
+        "url": "https://polar.sh/cva"
+      }
+    },
+    "node_modules/client-only": {
+      "version": "0.0.1",
+      "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
+      "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==",
+      "license": "MIT"
+    },
+    "node_modules/cliui": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
+      "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "string-width": "^4.2.0",
+        "strip-ansi": "^6.0.1",
+        "wrap-ansi": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/clsx": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz",
+      "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/co": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
+      "integrity": "sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "iojs": ">= 1.0.0",
+        "node": ">= 0.12.0"
+      }
+    },
+    "node_modules/collect-v8-coverage": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.2.tgz",
+      "integrity": "sha512-lHl4d5/ONEbLlJvaJNtsF/Lz+WvB07u2ycqTYbdrq7UypDXailES4valYb2eWiJFxZlVmpGekfqoxQhzyFdT4Q==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/color": {
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
+      "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "color-convert": "^2.0.1",
+        "color-string": "^1.9.0"
+      },
+      "engines": {
+        "node": ">=12.5.0"
+      }
+    },
+    "node_modules/color-convert": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+      "devOptional": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/color-name": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+      "devOptional": true,
+      "license": "MIT"
+    },
+    "node_modules/color-string": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
+      "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "color-name": "^1.0.0",
+        "simple-swizzle": "^0.2.2"
+      }
+    },
+    "node_modules/combined-stream": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+      "license": "MIT",
+      "dependencies": {
+        "delayed-stream": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/concat-map": {
+      "version": "0.0.1",
+      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
+      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/content-disposition": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.0.tgz",
+      "integrity": "sha512-Au9nRL8VNUut/XSzbQA38+M78dzP4D+eqg3gfJHMIHHYa3bg067xj1KxMUWj+VULbiZMowKngFFbKczUrNJ1mg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "safe-buffer": "5.2.1"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/content-type": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
+      "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/convert-source-map": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
+      "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/cookie": {
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz",
+      "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/cookie-signature": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz",
+      "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.6.0"
+      }
+    },
+    "node_modules/cors": {
+      "version": "2.8.5",
+      "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz",
+      "integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "object-assign": "^4",
+        "vary": "^1"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/create-jest": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/create-jest/-/create-jest-29.7.0.tgz",
+      "integrity": "sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "chalk": "^4.0.0",
+        "exit": "^0.1.2",
+        "graceful-fs": "^4.2.9",
+        "jest-config": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "prompts": "^2.0.1"
+      },
+      "bin": {
+        "create-jest": "bin/create-jest.js"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/create-require": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz",
+      "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/cross-spawn": {
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "path-key": "^3.1.0",
+        "shebang-command": "^2.0.0",
+        "which": "^2.0.1"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/css.escape": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/css.escape/-/css.escape-1.5.1.tgz",
+      "integrity": "sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/cssom": {
+      "version": "0.5.0",
+      "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.5.0.tgz",
+      "integrity": "sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/cssstyle": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-2.3.0.tgz",
+      "integrity": "sha512-AZL67abkUzIuvcHqk7c09cezpGNcxUxU4Ioi/05xHk4DQeTkWmGYftIE6ctU6AEt+Gn4n1lDStOtj7FKycP71A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "cssom": "~0.3.6"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/cssstyle/node_modules/cssom": {
+      "version": "0.3.8",
+      "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.3.8.tgz",
+      "integrity": "sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/csstype": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
+      "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
+      "devOptional": true,
+      "license": "MIT"
+    },
+    "node_modules/damerau-levenshtein": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz",
+      "integrity": "sha512-sdQSFB7+llfUcQHUQO3+B8ERRj0Oa4w9POWMI/puGtuf7gFywGmkaLCElnudfTiKZV+NvHqL0ifzdrI8Ro7ESA==",
+      "dev": true,
+      "license": "BSD-2-Clause"
+    },
+    "node_modules/data-urls": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-3.0.2.tgz",
+      "integrity": "sha512-Jy/tj3ldjZJo63sVAvg6LHt2mHvl4V6AgRAmNDtLdm7faqtsx+aJG42rsyCo9JCoRVKwPFzKlIPx3DIibwSIaQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "abab": "^2.0.6",
+        "whatwg-mimetype": "^3.0.0",
+        "whatwg-url": "^11.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/data-urls/node_modules/tr46": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-3.0.0.tgz",
+      "integrity": "sha512-l7FvfAHlcmulp8kr+flpQZmVwtu7nfRV7NZujtN0OqES8EL4O4e0qqzL0DC5gAvx/ZC/9lk6rhcUwYvkBnBnYA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "punycode": "^2.1.1"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/data-urls/node_modules/webidl-conversions": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz",
+      "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/data-urls/node_modules/whatwg-url": {
+      "version": "11.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-11.0.0.tgz",
+      "integrity": "sha512-RKT8HExMpoYx4igMiVMY83lN6UeITKJlBQ+vR/8ZJ8OCdSiN3RwCq+9gH0+Xzj0+5IrM6i4j/6LuvzbZIQgEcQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "tr46": "^3.0.0",
+        "webidl-conversions": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/data-view-buffer": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/data-view-buffer/-/data-view-buffer-1.0.2.tgz",
+      "integrity": "sha512-EmKO5V3OLXh1rtK2wgXRansaK1/mtVdTUEiEI0W8RkvgT05kfxaH29PliLnpLP73yYO6142Q72QNa8Wx/A5CqQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "es-errors": "^1.3.0",
+        "is-data-view": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/data-view-byte-length": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/data-view-byte-length/-/data-view-byte-length-1.0.2.tgz",
+      "integrity": "sha512-tuhGbE6CfTM9+5ANGf+oQb72Ky/0+s3xKUpHvShfiz2RxMFgFPjsXuRLBVMtvMs15awe45SRb83D6wH4ew6wlQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "es-errors": "^1.3.0",
+        "is-data-view": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/inspect-js"
+      }
+    },
+    "node_modules/data-view-byte-offset": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/data-view-byte-offset/-/data-view-byte-offset-1.0.1.tgz",
+      "integrity": "sha512-BS8PfmtDGnrgYdOonGZQdLZslWIeCGFP9tpan0hi1Co2Zr2NKADsvGYA8XxuG/4UWgJ6Cjtv+YJnB6MM69QGlQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "is-data-view": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/debug": {
+      "version": "4.4.1",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz",
+      "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.3"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/decimal.js": {
+      "version": "10.5.0",
+      "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.5.0.tgz",
+      "integrity": "sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/dedent": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.6.0.tgz",
+      "integrity": "sha512-F1Z+5UCFpmQUzJa11agbyPVMbpgT/qA3/SKyJ1jyBgm7dUcUEa8v9JwDkerSQXfakBwFljIxhOJqGkjUwZ9FSA==",
+      "dev": true,
+      "license": "MIT",
+      "peerDependencies": {
+        "babel-plugin-macros": "^3.1.0"
+      },
+      "peerDependenciesMeta": {
+        "babel-plugin-macros": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/deep-is": {
+      "version": "0.1.4",
+      "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
+      "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/deepmerge": {
+      "version": "4.3.1",
+      "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
+      "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/define-data-property": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
+      "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-define-property": "^1.0.0",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/define-properties": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz",
+      "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "define-data-property": "^1.0.1",
+        "has-property-descriptors": "^1.0.0",
+        "object-keys": "^1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/delayed-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/depd": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
+      "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/dequal": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
+      "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/detect-libc": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.4.tgz",
+      "integrity": "sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==",
+      "devOptional": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/detect-newline": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz",
+      "integrity": "sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/detect-node-es": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz",
+      "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==",
+      "license": "MIT"
+    },
+    "node_modules/diff": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
+      "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.3.1"
+      }
+    },
+    "node_modules/diff-sequences": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz",
+      "integrity": "sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/doctrine": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz",
+      "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "esutils": "^2.0.2"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/dom-accessibility-api": {
+      "version": "0.5.16",
+      "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz",
+      "integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/domexception": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/domexception/-/domexception-4.0.0.tgz",
+      "integrity": "sha512-A2is4PLG+eeSfoTMA95/s4pvAoSo2mKtiM5jlHkAVewmiO8ISFTFKZjH7UAM1Atli/OT/7JHOrJRJiMKUZKYBw==",
+      "deprecated": "Use your platform's native DOMException instead",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "webidl-conversions": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/domexception/node_modules/webidl-conversions": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz",
+      "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/dunder-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
+      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.2.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/ee-first": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
+      "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/electron-to-chromium": {
+      "version": "1.5.155",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.155.tgz",
+      "integrity": "sha512-ps5KcGGmwL8VaeJlvlDlu4fORQpv3+GIcF5I3f9tUKUlJ/wsysh6HU8P5L1XWRYeXfA0oJd4PyM8ds8zTFf6Ng==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/emittery": {
+      "version": "0.13.1",
+      "resolved": "https://registry.npmjs.org/emittery/-/emittery-0.13.1.tgz",
+      "integrity": "sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sindresorhus/emittery?sponsor=1"
+      }
+    },
+    "node_modules/emoji-regex": {
+      "version": "9.2.2",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
+      "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/encodeurl": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
+      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/enhanced-resolve": {
+      "version": "5.18.1",
+      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.1.tgz",
+      "integrity": "sha512-ZSW3ma5GkcQBIpwZTSRAI8N71Uuwgs93IezB7mf7R60tC8ZbJideoDNKjHn2O9KIlx6rkGTTEk1xUCK2E1Y2Yg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "graceful-fs": "^4.2.4",
+        "tapable": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=10.13.0"
+      }
+    },
+    "node_modules/entities": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.0.tgz",
+      "integrity": "sha512-aKstq2TDOndCn4diEyp9Uq/Flu2i1GlLkc6XIDQSDMuaFE3OPW5OphLCyQ5SpSJZTb4reN+kTcYru5yIfXoRPw==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
+    "node_modules/error-ex": {
+      "version": "1.3.2",
+      "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz",
+      "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-arrayish": "^0.2.1"
+      }
+    },
+    "node_modules/error-ex/node_modules/is-arrayish": {
+      "version": "0.2.1",
+      "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz",
+      "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/es-abstract": {
+      "version": "1.23.9",
+      "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.9.tgz",
+      "integrity": "sha512-py07lI0wjxAC/DcfK1S6G7iANonniZwTISvdPzk9hzeH0IZIshbuuFxLIU96OyF89Yb9hiqWn8M/bY83KY5vzA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "array-buffer-byte-length": "^1.0.2",
+        "arraybuffer.prototype.slice": "^1.0.4",
+        "available-typed-arrays": "^1.0.7",
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.3",
+        "data-view-buffer": "^1.0.2",
+        "data-view-byte-length": "^1.0.2",
+        "data-view-byte-offset": "^1.0.1",
+        "es-define-property": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.0.0",
+        "es-set-tostringtag": "^2.1.0",
+        "es-to-primitive": "^1.3.0",
+        "function.prototype.name": "^1.1.8",
+        "get-intrinsic": "^1.2.7",
+        "get-proto": "^1.0.0",
+        "get-symbol-description": "^1.1.0",
+        "globalthis": "^1.0.4",
+        "gopd": "^1.2.0",
+        "has-property-descriptors": "^1.0.2",
+        "has-proto": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "hasown": "^2.0.2",
+        "internal-slot": "^1.1.0",
+        "is-array-buffer": "^3.0.5",
+        "is-callable": "^1.2.7",
+        "is-data-view": "^1.0.2",
+        "is-regex": "^1.2.1",
+        "is-shared-array-buffer": "^1.0.4",
+        "is-string": "^1.1.1",
+        "is-typed-array": "^1.1.15",
+        "is-weakref": "^1.1.0",
+        "math-intrinsics": "^1.1.0",
+        "object-inspect": "^1.13.3",
+        "object-keys": "^1.1.1",
+        "object.assign": "^4.1.7",
+        "own-keys": "^1.0.1",
+        "regexp.prototype.flags": "^1.5.3",
+        "safe-array-concat": "^1.1.3",
+        "safe-push-apply": "^1.0.0",
+        "safe-regex-test": "^1.1.0",
+        "set-proto": "^1.0.0",
+        "string.prototype.trim": "^1.2.10",
+        "string.prototype.trimend": "^1.0.9",
+        "string.prototype.trimstart": "^1.0.8",
+        "typed-array-buffer": "^1.0.3",
+        "typed-array-byte-length": "^1.0.3",
+        "typed-array-byte-offset": "^1.0.4",
+        "typed-array-length": "^1.0.7",
+        "unbox-primitive": "^1.1.0",
+        "which-typed-array": "^1.1.18"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/es-define-property": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-iterator-helpers": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.2.1.tgz",
+      "integrity": "sha512-uDn+FE1yrDzyC0pCo961B2IHbdM8y/ACZsKD4dG6WqrjV53BADjwa7D+1aom2rsNVfLyDgU/eigvlJGJ08OQ4w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.3",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.6",
+        "es-errors": "^1.3.0",
+        "es-set-tostringtag": "^2.0.3",
+        "function-bind": "^1.1.2",
+        "get-intrinsic": "^1.2.6",
+        "globalthis": "^1.0.4",
+        "gopd": "^1.2.0",
+        "has-property-descriptors": "^1.0.2",
+        "has-proto": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "internal-slot": "^1.1.0",
+        "iterator.prototype": "^1.1.4",
+        "safe-array-concat": "^1.1.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-object-atoms": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
+      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-set-tostringtag": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
+      "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.6",
+        "has-tostringtag": "^1.0.2",
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-shim-unscopables": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/es-shim-unscopables/-/es-shim-unscopables-1.1.0.tgz",
+      "integrity": "sha512-d9T8ucsEhh8Bi1woXCf+TIKDIROLG5WCkxg8geBCbvk22kzwC5G2OnXVMO6FUsvQlgUUXQ2itephWDLqDzbeCw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-to-primitive": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.3.0.tgz",
+      "integrity": "sha512-w+5mJ3GuFL+NjVtJlvydShqE1eN3h3PbI7/5LAsYJP/2qtuMXjfL2LpHSRqo4b4eSF5K/DH1JXKUAHSB2UW50g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-callable": "^1.2.7",
+        "is-date-object": "^1.0.5",
+        "is-symbol": "^1.0.4"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/escalade": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/escape-html": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
+      "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/escape-string-regexp": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
+      "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/escodegen": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz",
+      "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "esprima": "^4.0.1",
+        "estraverse": "^5.2.0",
+        "esutils": "^2.0.2"
+      },
+      "bin": {
+        "escodegen": "bin/escodegen.js",
+        "esgenerate": "bin/esgenerate.js"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "optionalDependencies": {
+        "source-map": "~0.6.1"
+      }
+    },
+    "node_modules/eslint": {
+      "version": "9.26.0",
+      "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.26.0.tgz",
+      "integrity": "sha512-Hx0MOjPh6uK9oq9nVsATZKE/Wlbai7KFjfCuw9UHaguDW3x+HF0O5nIi3ud39TWgrTjTO5nHxmL3R1eANinWHQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@eslint-community/eslint-utils": "^4.2.0",
+        "@eslint-community/regexpp": "^4.12.1",
+        "@eslint/config-array": "^0.20.0",
+        "@eslint/config-helpers": "^0.2.1",
+        "@eslint/core": "^0.13.0",
+        "@eslint/eslintrc": "^3.3.1",
+        "@eslint/js": "9.26.0",
+        "@eslint/plugin-kit": "^0.2.8",
+        "@humanfs/node": "^0.16.6",
+        "@humanwhocodes/module-importer": "^1.0.1",
+        "@humanwhocodes/retry": "^0.4.2",
+        "@modelcontextprotocol/sdk": "^1.8.0",
+        "@types/estree": "^1.0.6",
+        "@types/json-schema": "^7.0.15",
+        "ajv": "^6.12.4",
+        "chalk": "^4.0.0",
+        "cross-spawn": "^7.0.6",
+        "debug": "^4.3.2",
+        "escape-string-regexp": "^4.0.0",
+        "eslint-scope": "^8.3.0",
+        "eslint-visitor-keys": "^4.2.0",
+        "espree": "^10.3.0",
+        "esquery": "^1.5.0",
+        "esutils": "^2.0.2",
+        "fast-deep-equal": "^3.1.3",
+        "file-entry-cache": "^8.0.0",
+        "find-up": "^5.0.0",
+        "glob-parent": "^6.0.2",
+        "ignore": "^5.2.0",
+        "imurmurhash": "^0.1.4",
+        "is-glob": "^4.0.0",
+        "json-stable-stringify-without-jsonify": "^1.0.1",
+        "lodash.merge": "^4.6.2",
+        "minimatch": "^3.1.2",
+        "natural-compare": "^1.4.0",
+        "optionator": "^0.9.3",
+        "zod": "^3.24.2"
+      },
+      "bin": {
+        "eslint": "bin/eslint.js"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://eslint.org/donate"
+      },
+      "peerDependencies": {
+        "jiti": "*"
+      },
+      "peerDependenciesMeta": {
+        "jiti": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/eslint-config-next": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-15.3.2.tgz",
+      "integrity": "sha512-FerU4DYccO4FgeYFFglz0SnaKRe1ejXQrDb8kWUkTAg036YWi+jUsgg4sIGNCDhAsDITsZaL4MzBWKB6f4G1Dg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@next/eslint-plugin-next": "15.3.2",
+        "@rushstack/eslint-patch": "^1.10.3",
+        "@typescript-eslint/eslint-plugin": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
+        "@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
+        "eslint-import-resolver-node": "^0.3.6",
+        "eslint-import-resolver-typescript": "^3.5.2",
+        "eslint-plugin-import": "^2.31.0",
+        "eslint-plugin-jsx-a11y": "^6.10.0",
+        "eslint-plugin-react": "^7.37.0",
+        "eslint-plugin-react-hooks": "^5.0.0"
+      },
+      "peerDependencies": {
+        "eslint": "^7.23.0 || ^8.0.0 || ^9.0.0",
+        "typescript": ">=3.3.1"
+      },
+      "peerDependenciesMeta": {
+        "typescript": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/eslint-config-prettier": {
+      "version": "10.1.5",
+      "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-10.1.5.tgz",
+      "integrity": "sha512-zc1UmCpNltmVY34vuLRV61r1K27sWuX39E+uyUnY8xS2Bex88VV9cugG+UZbRSRGtGyFboj+D8JODyme1plMpw==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "eslint-config-prettier": "bin/cli.js"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint-config-prettier"
+      },
+      "peerDependencies": {
+        "eslint": ">=7.0.0"
+      }
+    },
+    "node_modules/eslint-import-resolver-node": {
+      "version": "0.3.9",
+      "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.9.tgz",
+      "integrity": "sha512-WFj2isz22JahUv+B788TlO3N6zL3nNJGU8CcZbPZvVEkBPaJdCV4vy5wyghty5ROFbCRnm132v8BScu5/1BQ8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^3.2.7",
+        "is-core-module": "^2.13.0",
+        "resolve": "^1.22.4"
+      }
+    },
+    "node_modules/eslint-import-resolver-node/node_modules/debug": {
+      "version": "3.2.7",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
+      "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.1"
+      }
+    },
+    "node_modules/eslint-import-resolver-typescript": {
+      "version": "3.10.1",
+      "resolved": "https://registry.npmjs.org/eslint-import-resolver-typescript/-/eslint-import-resolver-typescript-3.10.1.tgz",
+      "integrity": "sha512-A1rHYb06zjMGAxdLSkN2fXPBwuSaQ0iO5M/hdyS0Ajj1VBaRp0sPD3dn1FhME3c/JluGFbwSxyCfqdSbtQLAHQ==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "@nolyfill/is-core-module": "1.0.39",
+        "debug": "^4.4.0",
+        "get-tsconfig": "^4.10.0",
+        "is-bun-module": "^2.0.0",
+        "stable-hash": "^0.0.5",
+        "tinyglobby": "^0.2.13",
+        "unrs-resolver": "^1.6.2"
+      },
+      "engines": {
+        "node": "^14.18.0 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint-import-resolver-typescript"
+      },
+      "peerDependencies": {
+        "eslint": "*",
+        "eslint-plugin-import": "*",
+        "eslint-plugin-import-x": "*"
+      },
+      "peerDependenciesMeta": {
+        "eslint-plugin-import": {
+          "optional": true
+        },
+        "eslint-plugin-import-x": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/eslint-module-utils": {
+      "version": "2.12.0",
+      "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.12.0.tgz",
+      "integrity": "sha512-wALZ0HFoytlyh/1+4wuZ9FJCD/leWHQzzrxJ8+rebyReSLk7LApMyd3WJaLVoN+D5+WIdJyDK1c6JnE65V4Zyg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^3.2.7"
+      },
+      "engines": {
+        "node": ">=4"
+      },
+      "peerDependenciesMeta": {
+        "eslint": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/eslint-module-utils/node_modules/debug": {
+      "version": "3.2.7",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
+      "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.1"
+      }
+    },
+    "node_modules/eslint-plugin-import": {
+      "version": "2.31.0",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.31.0.tgz",
+      "integrity": "sha512-ixmkI62Rbc2/w8Vfxyh1jQRTdRTF52VxwRVHl/ykPAmqG+Nb7/kNn+byLP0LxPgI7zWA16Jt82SybJInmMia3A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@rtsao/scc": "^1.1.0",
+        "array-includes": "^3.1.8",
+        "array.prototype.findlastindex": "^1.2.5",
+        "array.prototype.flat": "^1.3.2",
+        "array.prototype.flatmap": "^1.3.2",
+        "debug": "^3.2.7",
+        "doctrine": "^2.1.0",
+        "eslint-import-resolver-node": "^0.3.9",
+        "eslint-module-utils": "^2.12.0",
+        "hasown": "^2.0.2",
+        "is-core-module": "^2.15.1",
+        "is-glob": "^4.0.3",
+        "minimatch": "^3.1.2",
+        "object.fromentries": "^2.0.8",
+        "object.groupby": "^1.0.3",
+        "object.values": "^1.2.0",
+        "semver": "^6.3.1",
+        "string.prototype.trimend": "^1.0.8",
+        "tsconfig-paths": "^3.15.0"
+      },
+      "engines": {
+        "node": ">=4"
+      },
+      "peerDependencies": {
+        "eslint": "^2 || ^3 || ^4 || ^5 || ^6 || ^7.2.0 || ^8 || ^9"
+      }
+    },
+    "node_modules/eslint-plugin-import/node_modules/debug": {
+      "version": "3.2.7",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
+      "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.1"
+      }
+    },
+    "node_modules/eslint-plugin-import/node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
+    "node_modules/eslint-plugin-jsx-a11y": {
+      "version": "6.10.2",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.10.2.tgz",
+      "integrity": "sha512-scB3nz4WmG75pV8+3eRUQOHZlNSUhFNq37xnpgRkCCELU3XMvXAxLk1eqWWyE22Ki4Q01Fnsw9BA3cJHDPgn2Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "aria-query": "^5.3.2",
+        "array-includes": "^3.1.8",
+        "array.prototype.flatmap": "^1.3.2",
+        "ast-types-flow": "^0.0.8",
+        "axe-core": "^4.10.0",
+        "axobject-query": "^4.1.0",
+        "damerau-levenshtein": "^1.0.8",
+        "emoji-regex": "^9.2.2",
+        "hasown": "^2.0.2",
+        "jsx-ast-utils": "^3.3.5",
+        "language-tags": "^1.0.9",
+        "minimatch": "^3.1.2",
+        "object.fromentries": "^2.0.8",
+        "safe-regex-test": "^1.0.3",
+        "string.prototype.includes": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=4.0"
+      },
+      "peerDependencies": {
+        "eslint": "^3 || ^4 || ^5 || ^6 || ^7 || ^8 || ^9"
+      }
+    },
+    "node_modules/eslint-plugin-prettier": {
+      "version": "5.4.0",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-prettier/-/eslint-plugin-prettier-5.4.0.tgz",
+      "integrity": "sha512-BvQOvUhkVQM1i63iMETK9Hjud9QhqBnbtT1Zc642p9ynzBuCe5pybkOnvqZIBypXmMlsGcnU4HZ8sCTPfpAexA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "prettier-linter-helpers": "^1.0.0",
+        "synckit": "^0.11.0"
+      },
+      "engines": {
+        "node": "^14.18.0 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint-plugin-prettier"
+      },
+      "peerDependencies": {
+        "@types/eslint": ">=8.0.0",
+        "eslint": ">=8.0.0",
+        "eslint-config-prettier": ">= 7.0.0 <10.0.0 || >=10.1.0",
+        "prettier": ">=3.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/eslint": {
+          "optional": true
+        },
+        "eslint-config-prettier": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/eslint-plugin-react": {
+      "version": "7.37.5",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.37.5.tgz",
+      "integrity": "sha512-Qteup0SqU15kdocexFNAJMvCJEfa2xUKNV4CC1xsVMrIIqEy3SQ/rqyxCWNzfrd3/ldy6HMlD2e0JDVpDg2qIA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "array-includes": "^3.1.8",
+        "array.prototype.findlast": "^1.2.5",
+        "array.prototype.flatmap": "^1.3.3",
+        "array.prototype.tosorted": "^1.1.4",
+        "doctrine": "^2.1.0",
+        "es-iterator-helpers": "^1.2.1",
+        "estraverse": "^5.3.0",
+        "hasown": "^2.0.2",
+        "jsx-ast-utils": "^2.4.1 || ^3.0.0",
+        "minimatch": "^3.1.2",
+        "object.entries": "^1.1.9",
+        "object.fromentries": "^2.0.8",
+        "object.values": "^1.2.1",
+        "prop-types": "^15.8.1",
+        "resolve": "^2.0.0-next.5",
+        "semver": "^6.3.1",
+        "string.prototype.matchall": "^4.0.12",
+        "string.prototype.repeat": "^1.0.0"
+      },
+      "engines": {
+        "node": ">=4"
+      },
+      "peerDependencies": {
+        "eslint": "^3 || ^4 || ^5 || ^6 || ^7 || ^8 || ^9.7"
+      }
+    },
+    "node_modules/eslint-plugin-react-hooks": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-5.2.0.tgz",
+      "integrity": "sha512-+f15FfK64YQwZdJNELETdn5ibXEUQmW1DZL6KXhNnc2heoy/sg9VJJeT7n8TlMWouzWqSWavFkIhHyIbIAEapg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0"
+      }
+    },
+    "node_modules/eslint-plugin-react/node_modules/resolve": {
+      "version": "2.0.0-next.5",
+      "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.5.tgz",
+      "integrity": "sha512-U7WjGVG9sH8tvjW5SmGbQuui75FiyjAX72HX15DwBBwF9dNiQZRQAg9nnPhYy+TUnE0+VcrttuvNI8oSxZcocA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-core-module": "^2.13.0",
+        "path-parse": "^1.0.7",
+        "supports-preserve-symlinks-flag": "^1.0.0"
+      },
+      "bin": {
+        "resolve": "bin/resolve"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/eslint-plugin-react/node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
+    "node_modules/eslint-scope": {
+      "version": "8.3.0",
+      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.3.0.tgz",
+      "integrity": "sha512-pUNxi75F8MJ/GdeKtVLSbYg4ZI34J6C0C7sbL4YOp2exGwen7ZsuBqKzUhXd0qMQ362yET3z+uPwKeg/0C2XCQ==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "esrecurse": "^4.3.0",
+        "estraverse": "^5.2.0"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/eslint-visitor-keys": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.0.tgz",
+      "integrity": "sha512-UyLnSehNt62FFhSwjZlHmeokpRK59rcz29j+F1/aDgbkbRTk7wIc9XzdoasMUbRNKDM0qQt/+BJ4BrpFeABemw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/espree": {
+      "version": "10.3.0",
+      "resolved": "https://registry.npmjs.org/espree/-/espree-10.3.0.tgz",
+      "integrity": "sha512-0QYC8b24HWY8zjRnDTL6RiHfDbAWn63qb4LMj1Z4b076A4une81+z03Kg7l7mn/48PUTqoLptSXez8oknU8Clg==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "acorn": "^8.14.0",
+        "acorn-jsx": "^5.3.2",
+        "eslint-visitor-keys": "^4.2.0"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/esprima": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
+      "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "bin": {
+        "esparse": "bin/esparse.js",
+        "esvalidate": "bin/esvalidate.js"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/esquery": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz",
+      "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "estraverse": "^5.1.0"
+      },
+      "engines": {
+        "node": ">=0.10"
+      }
+    },
+    "node_modules/esrecurse": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
+      "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "estraverse": "^5.2.0"
+      },
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/estraverse": {
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
+      "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/esutils": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
+      "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/etag": {
+      "version": "1.8.1",
+      "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
+      "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/event-target-shim": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
+      "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/eventsource": {
+      "version": "3.0.7",
+      "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz",
+      "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "eventsource-parser": "^3.0.1"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/eventsource-parser": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.2.tgz",
+      "integrity": "sha512-6RxOBZ/cYgd8usLwsEl+EC09Au/9BcmCKYF2/xbml6DNczf7nv0MQb+7BA2F+li6//I+28VNlQR37XfQtcAJuA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/execa": {
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz",
+      "integrity": "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "cross-spawn": "^7.0.3",
+        "get-stream": "^6.0.0",
+        "human-signals": "^2.1.0",
+        "is-stream": "^2.0.0",
+        "merge-stream": "^2.0.0",
+        "npm-run-path": "^4.0.1",
+        "onetime": "^5.1.2",
+        "signal-exit": "^3.0.3",
+        "strip-final-newline": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sindresorhus/execa?sponsor=1"
+      }
+    },
+    "node_modules/exit": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/exit/-/exit-0.1.2.tgz",
+      "integrity": "sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==",
+      "dev": true,
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/expect": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/expect/-/expect-29.7.0.tgz",
+      "integrity": "sha512-2Zks0hf1VLFYI1kbh0I5jP3KHHyCHpkfyHBzsSXRFgl/Bg9mWYfMW8oD+PdMPlEwy5HNsR9JutYy6pMeOh61nw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/expect-utils": "^29.7.0",
+        "jest-get-type": "^29.6.3",
+        "jest-matcher-utils": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-util": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/express": {
+      "version": "5.1.0",
+      "resolved": "https://registry.npmjs.org/express/-/express-5.1.0.tgz",
+      "integrity": "sha512-DT9ck5YIRU+8GYzzU5kT3eHGA5iL+1Zd0EutOmTE9Dtk+Tvuzd23VBU+ec7HPNSTxXYO55gPV/hq4pSBJDjFpA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "accepts": "^2.0.0",
+        "body-parser": "^2.2.0",
+        "content-disposition": "^1.0.0",
+        "content-type": "^1.0.5",
+        "cookie": "^0.7.1",
+        "cookie-signature": "^1.2.1",
+        "debug": "^4.4.0",
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "etag": "^1.8.1",
+        "finalhandler": "^2.1.0",
+        "fresh": "^2.0.0",
+        "http-errors": "^2.0.0",
+        "merge-descriptors": "^2.0.0",
+        "mime-types": "^3.0.0",
+        "on-finished": "^2.4.1",
+        "once": "^1.4.0",
+        "parseurl": "^1.3.3",
+        "proxy-addr": "^2.0.7",
+        "qs": "^6.14.0",
+        "range-parser": "^1.2.1",
+        "router": "^2.2.0",
+        "send": "^1.1.0",
+        "serve-static": "^2.2.0",
+        "statuses": "^2.0.1",
+        "type-is": "^2.0.1",
+        "vary": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/express-rate-limit": {
+      "version": "7.5.0",
+      "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-7.5.0.tgz",
+      "integrity": "sha512-eB5zbQh5h+VenMPM3fh+nw1YExi5nMr6HUCR62ELSP11huvxm/Uir1H1QEyTkk5QX6A58pX6NmaTMceKZ0Eodg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 16"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/express-rate-limit"
+      },
+      "peerDependencies": {
+        "express": "^4.11 || 5 || ^5.0.0-beta.1"
+      }
+    },
+    "node_modules/fast-deep-equal": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
+      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/fast-diff": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/fast-diff/-/fast-diff-1.3.0.tgz",
+      "integrity": "sha512-VxPP4NqbUjj6MaAOafWeUn2cXWLcCtljklUtZf0Ind4XQ+QPtmA0b18zZy0jIQx+ExRVCR/ZQpBmik5lXshNsw==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/fast-glob": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz",
+      "integrity": "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.stat": "^2.0.2",
+        "@nodelib/fs.walk": "^1.2.3",
+        "glob-parent": "^5.1.2",
+        "merge2": "^1.3.0",
+        "micromatch": "^4.0.4"
+      },
+      "engines": {
+        "node": ">=8.6.0"
+      }
+    },
+    "node_modules/fast-glob/node_modules/glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/fast-json-stable-stringify": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
+      "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/fast-levenshtein": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
+      "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/fastq": {
+      "version": "1.19.1",
+      "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz",
+      "integrity": "sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "reusify": "^1.0.4"
+      }
+    },
+    "node_modules/fb-watchman": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/fb-watchman/-/fb-watchman-2.0.2.tgz",
+      "integrity": "sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "bser": "2.1.1"
+      }
+    },
+    "node_modules/file-entry-cache": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz",
+      "integrity": "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "flat-cache": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=16.0.0"
+      }
+    },
+    "node_modules/fill-range": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "to-regex-range": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/finalhandler": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.0.tgz",
+      "integrity": "sha512-/t88Ty3d5JWQbWYgaOGCCYfXRwV1+be02WqYYlL6h0lEiUAMPM8o8qKGO01YIkOHzka2up08wvgYD0mDiI+q3Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.4.0",
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "on-finished": "^2.4.1",
+        "parseurl": "^1.3.3",
+        "statuses": "^2.0.1"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/find-up": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
+      "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "locate-path": "^6.0.0",
+        "path-exists": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/flat-cache": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-4.0.1.tgz",
+      "integrity": "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "flatted": "^3.2.9",
+        "keyv": "^4.5.4"
+      },
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/flatted": {
+      "version": "3.3.3",
+      "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz",
+      "integrity": "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/for-each": {
+      "version": "0.3.5",
+      "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.5.tgz",
+      "integrity": "sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-callable": "^1.2.7"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/form-data": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz",
+      "integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==",
+      "license": "MIT",
+      "dependencies": {
+        "asynckit": "^0.4.0",
+        "combined-stream": "^1.0.8",
+        "es-set-tostringtag": "^2.1.0",
+        "mime-types": "^2.1.12"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/form-data-encoder": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
+      "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==",
+      "license": "MIT"
+    },
+    "node_modules/form-data/node_modules/mime-db": {
+      "version": "1.52.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
+      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/form-data/node_modules/mime-types": {
+      "version": "2.1.35",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
+      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "1.52.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/formdata-node": {
+      "version": "4.4.1",
+      "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
+      "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
+      "license": "MIT",
+      "dependencies": {
+        "node-domexception": "1.0.0",
+        "web-streams-polyfill": "4.0.0-beta.3"
+      },
+      "engines": {
+        "node": ">= 12.20"
+      }
+    },
+    "node_modules/forwarded": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
+      "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/fresh": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz",
+      "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/fs.realpath": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
+      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/fsevents": {
+      "version": "2.3.3",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
+      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
+    "node_modules/function-bind": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/function.prototype.name": {
+      "version": "1.1.8",
+      "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.8.tgz",
+      "integrity": "sha512-e5iwyodOHhbMr/yNrc7fDYG4qlbIvI5gajyzPnb5TCwyhjApznQh1BMFou9b30SevY43gCJKXycoCBjMbsuW0Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.3",
+        "define-properties": "^1.2.1",
+        "functions-have-names": "^1.2.3",
+        "hasown": "^2.0.2",
+        "is-callable": "^1.2.7"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/functions-have-names": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/functions-have-names/-/functions-have-names-1.2.3.tgz",
+      "integrity": "sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/gensync": {
+      "version": "1.0.0-beta.2",
+      "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",
+      "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/get-caller-file": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
+      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": "6.* || 8.* || >= 10.*"
+      }
+    },
+    "node_modules/get-intrinsic": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
+      "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.2",
+        "es-define-property": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.1.1",
+        "function-bind": "^1.1.2",
+        "get-proto": "^1.0.1",
+        "gopd": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "hasown": "^2.0.2",
+        "math-intrinsics": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-nonce": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz",
+      "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/get-package-type": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz",
+      "integrity": "sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.0.0"
+      }
+    },
+    "node_modules/get-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
+      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+      "license": "MIT",
+      "dependencies": {
+        "dunder-proto": "^1.0.1",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/get-stream": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
+      "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/get-symbol-description": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/get-symbol-description/-/get-symbol-description-1.1.0.tgz",
+      "integrity": "sha512-w9UMqWwJxHNOvoNzSJ2oPF5wvYcvP7jUvYzhp67yEhTi17ZDBBC1z9pTdGuzjD+EFIqLSYRweZjqfiPzQ06Ebg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.6"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-tsconfig": {
+      "version": "4.10.0",
+      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz",
+      "integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "resolve-pkg-maps": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
+      }
+    },
+    "node_modules/glob": {
+      "version": "7.2.3",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
+      "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
+      "deprecated": "Glob versions prior to v9 are no longer supported",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "fs.realpath": "^1.0.0",
+        "inflight": "^1.0.4",
+        "inherits": "2",
+        "minimatch": "^3.1.1",
+        "once": "^1.3.0",
+        "path-is-absolute": "^1.0.0"
+      },
+      "engines": {
+        "node": "*"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/glob-parent": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
+      "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.3"
+      },
+      "engines": {
+        "node": ">=10.13.0"
+      }
+    },
+    "node_modules/globals": {
+      "version": "14.0.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-14.0.0.tgz",
+      "integrity": "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/globalthis": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz",
+      "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "define-properties": "^1.2.1",
+        "gopd": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/gopd": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/graceful-fs": {
+      "version": "4.2.11",
+      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
+      "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/graphemer": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz",
+      "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/has-bigints": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.1.0.tgz",
+      "integrity": "sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-flag": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
+      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/has-property-descriptors": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
+      "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-define-property": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-proto": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.2.0.tgz",
+      "integrity": "sha512-KIL7eQPfHQRC8+XluaIw7BHUwwqL19bQn4hzNgdr+1wXoU0KKj6rufu47lhY7KbJR2C6T6+PfyN0Ea7wkSS+qQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "dunder-proto": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-symbols": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
+      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-tostringtag": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
+      "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
+      "license": "MIT",
+      "dependencies": {
+        "has-symbols": "^1.0.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/hasown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "license": "MIT",
+      "dependencies": {
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/html-encoding-sniffer": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-3.0.0.tgz",
+      "integrity": "sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "whatwg-encoding": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/html-escaper": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz",
+      "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/http-errors": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz",
+      "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "depd": "2.0.0",
+        "inherits": "2.0.4",
+        "setprototypeof": "1.2.0",
+        "statuses": "2.0.1",
+        "toidentifier": "1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/http-proxy-agent": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-5.0.0.tgz",
+      "integrity": "sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@tootallnate/once": "2",
+        "agent-base": "6",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/https-proxy-agent": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz",
+      "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "6",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/human-signals": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz",
+      "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=10.17.0"
+      }
+    },
+    "node_modules/humanize-ms": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
+      "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.0.0"
+      }
+    },
+    "node_modules/iconv-lite": {
+      "version": "0.6.3",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
+      "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/ignore": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
+      "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 4"
+      }
+    },
+    "node_modules/import-fresh": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz",
+      "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "parent-module": "^1.0.0",
+        "resolve-from": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/import-local": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.2.0.tgz",
+      "integrity": "sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "pkg-dir": "^4.2.0",
+        "resolve-cwd": "^3.0.0"
+      },
+      "bin": {
+        "import-local-fixture": "fixtures/cli.js"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/imurmurhash": {
+      "version": "0.1.4",
+      "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz",
+      "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.8.19"
+      }
+    },
+    "node_modules/indent-string": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz",
+      "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/inflight": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
+      "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
+      "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "once": "^1.3.0",
+        "wrappy": "1"
+      }
+    },
+    "node_modules/inherits": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/internal-slot": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.1.0.tgz",
+      "integrity": "sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "hasown": "^2.0.2",
+        "side-channel": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/ipaddr.js": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
+      "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/is-array-buffer": {
+      "version": "3.0.5",
+      "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.5.tgz",
+      "integrity": "sha512-DDfANUiiG2wC1qawP66qlTugJeL5HyzMpfr8lLK+jMQirGzNod0B12cFB/9q838Ru27sBwfw78/rdoU7RERz6A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.3",
+        "get-intrinsic": "^1.2.6"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-arrayish": {
+      "version": "0.3.2",
+      "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
+      "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/is-async-function": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/is-async-function/-/is-async-function-2.1.1.tgz",
+      "integrity": "sha512-9dgM/cZBnNvjzaMYHVoxxfPj2QXt22Ev7SuuPrs+xav0ukGB0S6d4ydZdEiM48kLx5kDV+QBPrpVnFyefL8kkQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "async-function": "^1.0.0",
+        "call-bound": "^1.0.3",
+        "get-proto": "^1.0.1",
+        "has-tostringtag": "^1.0.2",
+        "safe-regex-test": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-bigint": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.1.0.tgz",
+      "integrity": "sha512-n4ZT37wG78iz03xPRKJrHTdZbe3IicyucEtdRsV5yglwc3GyUfbAfpSeD0FJ41NbUNSt5wbhqfp1fS+BgnvDFQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-bigints": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-boolean-object": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.2.2.tgz",
+      "integrity": "sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "has-tostringtag": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-bun-module": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/is-bun-module/-/is-bun-module-2.0.0.tgz",
+      "integrity": "sha512-gNCGbnnnnFAUGKeZ9PdbyeGYJqewpmc2aKHUEMO5nQPWU9lOmv7jcmQIv+qHD8fXW6W7qfuCwX4rY9LNRjXrkQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "semver": "^7.7.1"
+      }
+    },
+    "node_modules/is-callable": {
+      "version": "1.2.7",
+      "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz",
+      "integrity": "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-core-module": {
+      "version": "2.16.1",
+      "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz",
+      "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-data-view": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/is-data-view/-/is-data-view-1.0.2.tgz",
+      "integrity": "sha512-RKtWF8pGmS87i2D6gqQu/l7EYRlVdfzemCJN/P3UOs//x1QE7mfhvzHIApBTRf7axvT6DMGwSwBXYCT0nfB9xw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "get-intrinsic": "^1.2.6",
+        "is-typed-array": "^1.1.13"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-date-object": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.1.0.tgz",
+      "integrity": "sha512-PwwhEakHVKTdRNVOw+/Gyh0+MzlCl4R6qKvkhuvLtPMggI1WAHt9sOwZxQLSGpUaDnrdyDsomoRgNnCfKNSXXg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "has-tostringtag": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-extglob": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
+      "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-finalizationregistry": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/is-finalizationregistry/-/is-finalizationregistry-1.1.1.tgz",
+      "integrity": "sha512-1pC6N8qWJbWoPtEjgcL2xyhQOP491EQjeUo3qTKcmV8YSDDJrOepfG8pcC7h/QgnQHYSv0mJ3Z/ZWxmatVrysg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
+      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-generator-fn": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/is-generator-fn/-/is-generator-fn-2.1.0.tgz",
+      "integrity": "sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/is-generator-function": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.1.0.tgz",
+      "integrity": "sha512-nPUB5km40q9e8UfN/Zc24eLlzdSf9OfKByBw9CIdw4H1giPMeA0OIJvbchsCu4npfI2QcMVBsGEBHKZ7wLTWmQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "get-proto": "^1.0.0",
+        "has-tostringtag": "^1.0.2",
+        "safe-regex-test": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-glob": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
+      "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-extglob": "^2.1.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-map": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz",
+      "integrity": "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-number": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
+      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.12.0"
+      }
+    },
+    "node_modules/is-number-object": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.1.1.tgz",
+      "integrity": "sha512-lZhclumE1G6VYD8VHe35wFaIif+CTy5SJIi5+3y4psDgWu4wPDoBhF8NxUOinEc7pHgiTsT6MaBb92rKhhD+Xw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "has-tostringtag": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-potential-custom-element-name": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz",
+      "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/is-promise": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz",
+      "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/is-regex": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.2.1.tgz",
+      "integrity": "sha512-MjYsKHO5O7mCsmRGxWcLWheFqN9DJ/2TmngvjKXihe6efViPqc274+Fx/4fYj/r03+ESvBdTXK0V6tA3rgez1g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "gopd": "^1.2.0",
+        "has-tostringtag": "^1.0.2",
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-set": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/is-set/-/is-set-2.0.3.tgz",
+      "integrity": "sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-shared-array-buffer": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/is-shared-array-buffer/-/is-shared-array-buffer-1.0.4.tgz",
+      "integrity": "sha512-ISWac8drv4ZGfwKl5slpHG9OwPNty4jOWPRIhBpxOoD+hqITiwuipOQ2bNthAzwA3B4fIjO4Nln74N0S9byq8A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-stream": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
+      "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/is-string": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.1.1.tgz",
+      "integrity": "sha512-BtEeSsoaQjlSPBemMQIrY1MY0uM6vnS1g5fmufYOtnxLGUZM2178PKbhsk7Ffv58IX+ZtcvoGwccYsh0PglkAA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "has-tostringtag": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-symbol": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.1.1.tgz",
+      "integrity": "sha512-9gGx6GTtCQM73BgmHQXfDmLtfjjTUDSyoxTCbp5WtoixAhfgsDirWIcVQ/IHpvI5Vgd5i/J5F7B9cN/WlVbC/w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "has-symbols": "^1.1.0",
+        "safe-regex-test": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-typed-array": {
+      "version": "1.1.15",
+      "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.15.tgz",
+      "integrity": "sha512-p3EcsicXjit7SaskXHs1hA91QxgTw46Fv6EFKKGS5DRFLD8yKnohjF3hxoju94b/OcMZoQukzpPpBE9uLVKzgQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "which-typed-array": "^1.1.16"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-weakmap": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz",
+      "integrity": "sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-weakref": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/is-weakref/-/is-weakref-1.1.1.tgz",
+      "integrity": "sha512-6i9mGWSlqzNMEqpCp93KwRS1uUOodk2OJ6b+sq7ZPDSy2WuI5NFIxp/254TytR8ftefexkWn5xNiHUNpPOfSew==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-weakset": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/is-weakset/-/is-weakset-2.0.4.tgz",
+      "integrity": "sha512-mfcwb6IzQyOKTs84CQMrOwW4gQcaTOAWJ0zzJCl2WSPDrWk/OzDaImWFH3djXhb24g4eudZfLRozAvPGw4d9hQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "get-intrinsic": "^1.2.6"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/isarray": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/isarray/-/isarray-2.0.5.tgz",
+      "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/isexe": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
+      "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/istanbul-lib-coverage": {
+      "version": "3.2.2",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
+      "integrity": "sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/istanbul-lib-instrument": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-6.0.3.tgz",
+      "integrity": "sha512-Vtgk7L/R2JHyyGW07spoFlB8/lpjiOLTjMdms6AFMraYt3BaJauod/NGrfnVG/y4Ix1JEuMRPDPEj2ua+zz1/Q==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@babel/core": "^7.23.9",
+        "@babel/parser": "^7.23.9",
+        "@istanbuljs/schema": "^0.1.3",
+        "istanbul-lib-coverage": "^3.2.0",
+        "semver": "^7.5.4"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/istanbul-lib-report": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-report/-/istanbul-lib-report-3.0.1.tgz",
+      "integrity": "sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "istanbul-lib-coverage": "^3.0.0",
+        "make-dir": "^4.0.0",
+        "supports-color": "^7.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/istanbul-lib-source-maps": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-source-maps/-/istanbul-lib-source-maps-4.0.1.tgz",
+      "integrity": "sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "debug": "^4.1.1",
+        "istanbul-lib-coverage": "^3.0.0",
+        "source-map": "^0.6.1"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/istanbul-reports": {
+      "version": "3.1.7",
+      "resolved": "https://registry.npmjs.org/istanbul-reports/-/istanbul-reports-3.1.7.tgz",
+      "integrity": "sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "html-escaper": "^2.0.0",
+        "istanbul-lib-report": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/iterator.prototype": {
+      "version": "1.1.5",
+      "resolved": "https://registry.npmjs.org/iterator.prototype/-/iterator.prototype-1.1.5.tgz",
+      "integrity": "sha512-H0dkQoCa3b2VEeKQBOxFph+JAbcrQdE7KC0UkqwpLmv2EC4P41QXP+rqo9wYodACiG5/WM5s9oDApTU8utwj9g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "define-data-property": "^1.1.4",
+        "es-object-atoms": "^1.0.0",
+        "get-intrinsic": "^1.2.6",
+        "get-proto": "^1.0.0",
+        "has-symbols": "^1.1.0",
+        "set-function-name": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/jest": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz",
+      "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/core": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "import-local": "^3.0.2",
+        "jest-cli": "^29.7.0"
+      },
+      "bin": {
+        "jest": "bin/jest.js"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0"
+      },
+      "peerDependenciesMeta": {
+        "node-notifier": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jest-changed-files": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-changed-files/-/jest-changed-files-29.7.0.tgz",
+      "integrity": "sha512-fEArFiwf1BpQ+4bXSprcDc3/x4HSzL4al2tozwVpDFpsxALjLYdyiIK4e5Vz66GQJIbXJ82+35PtysofptNX2w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "execa": "^5.0.0",
+        "jest-util": "^29.7.0",
+        "p-limit": "^3.1.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-circus": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-circus/-/jest-circus-29.7.0.tgz",
+      "integrity": "sha512-3E1nCMgipcTkCocFwM90XXQab9bS+GMsjdpmPrlelaxwD93Ad8iVEjX/vvHPdLPnFf+L40u+5+iutRdA1N9myw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/environment": "^29.7.0",
+        "@jest/expect": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "co": "^4.6.0",
+        "dedent": "^1.0.0",
+        "is-generator-fn": "^2.0.0",
+        "jest-each": "^29.7.0",
+        "jest-matcher-utils": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-runtime": "^29.7.0",
+        "jest-snapshot": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "p-limit": "^3.1.0",
+        "pretty-format": "^29.7.0",
+        "pure-rand": "^6.0.0",
+        "slash": "^3.0.0",
+        "stack-utils": "^2.0.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-circus/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/jest-circus/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-circus/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-cli": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-cli/-/jest-cli-29.7.0.tgz",
+      "integrity": "sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/core": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "chalk": "^4.0.0",
+        "create-jest": "^29.7.0",
+        "exit": "^0.1.2",
+        "import-local": "^3.0.2",
+        "jest-config": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-validate": "^29.7.0",
+        "yargs": "^17.3.1"
+      },
+      "bin": {
+        "jest": "bin/jest.js"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0"
+      },
+      "peerDependenciesMeta": {
+        "node-notifier": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jest-config": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-config/-/jest-config-29.7.0.tgz",
+      "integrity": "sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/core": "^7.11.6",
+        "@jest/test-sequencer": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "babel-jest": "^29.7.0",
+        "chalk": "^4.0.0",
+        "ci-info": "^3.2.0",
+        "deepmerge": "^4.2.2",
+        "glob": "^7.1.3",
+        "graceful-fs": "^4.2.9",
+        "jest-circus": "^29.7.0",
+        "jest-environment-node": "^29.7.0",
+        "jest-get-type": "^29.6.3",
+        "jest-regex-util": "^29.6.3",
+        "jest-resolve": "^29.7.0",
+        "jest-runner": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-validate": "^29.7.0",
+        "micromatch": "^4.0.4",
+        "parse-json": "^5.2.0",
+        "pretty-format": "^29.7.0",
+        "slash": "^3.0.0",
+        "strip-json-comments": "^3.1.1"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "@types/node": "*",
+        "ts-node": ">=9.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/node": {
+          "optional": true
+        },
+        "ts-node": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jest-config/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/jest-config/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-config/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-diff": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-29.7.0.tgz",
+      "integrity": "sha512-LMIgiIrhigmPrs03JHpxUh2yISK3vLFPkAodPeo0+BuF7wA2FoQbkEg1u8gBYBThncu7e1oEDUfIXVuTqLRUjw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "chalk": "^4.0.0",
+        "diff-sequences": "^29.6.3",
+        "jest-get-type": "^29.6.3",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-diff/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/jest-diff/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-diff/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-docblock": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-29.7.0.tgz",
+      "integrity": "sha512-q617Auw3A612guyaFgsbFeYpNP5t2aoUNLwBUbc/0kD1R4t9ixDbyFTHd1nok4epoVFpr7PmeWHrhvuV3XaJ4g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "detect-newline": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-each": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-29.7.0.tgz",
+      "integrity": "sha512-gns+Er14+ZrEoC5fhOfYCY1LOHHr0TI+rQUHZS8Ttw2l7gl+80eHc/gFf2Ktkw0+SIACDTeWvpFcv3B04VembQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "chalk": "^4.0.0",
+        "jest-get-type": "^29.6.3",
+        "jest-util": "^29.7.0",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-each/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/jest-each/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-each/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-environment-jsdom": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-environment-jsdom/-/jest-environment-jsdom-29.7.0.tgz",
+      "integrity": "sha512-k9iQbsf9OyOfdzWH8HDmrRT0gSIcX+FLNW7IQq94tFX0gynPwqDTW0Ho6iMVNjGz/nb+l/vW3dWM2bbLLpkbXA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/environment": "^29.7.0",
+        "@jest/fake-timers": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/jsdom": "^20.0.0",
+        "@types/node": "*",
+        "jest-mock": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jsdom": "^20.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "canvas": "^2.5.0"
+      },
+      "peerDependenciesMeta": {
+        "canvas": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jest-environment-node": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-29.7.0.tgz",
+      "integrity": "sha512-DOSwCRqXirTOyheM+4d5YZOrWcdu0LNZ87ewUoywbcb2XR4wKgqiG8vNeYwhjFMbEkfju7wx2GYH0P2gevGvFw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/environment": "^29.7.0",
+        "@jest/fake-timers": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "jest-mock": "^29.7.0",
+        "jest-util": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-get-type": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/jest-get-type/-/jest-get-type-29.6.3.tgz",
+      "integrity": "sha512-zrteXnqYxfQh7l5FHyL38jL39di8H8rHoecLH3JNxH3BwOrBsNeabdap5e0I23lD4HHI8W5VFBZqG4Eaq5LNcw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-haste-map": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-haste-map/-/jest-haste-map-29.7.0.tgz",
+      "integrity": "sha512-fP8u2pyfqx0K1rGn1R9pyE0/KTn+G7PxktWidOBTqFPLYX0b9ksaMFkhK5vrS3DVun09pckLdlx90QthlW7AmA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@types/graceful-fs": "^4.1.3",
+        "@types/node": "*",
+        "anymatch": "^3.0.3",
+        "fb-watchman": "^2.0.0",
+        "graceful-fs": "^4.2.9",
+        "jest-regex-util": "^29.6.3",
+        "jest-util": "^29.7.0",
+        "jest-worker": "^29.7.0",
+        "micromatch": "^4.0.4",
+        "walker": "^1.0.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "optionalDependencies": {
+        "fsevents": "^2.3.2"
+      }
+    },
+    "node_modules/jest-leak-detector": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-leak-detector/-/jest-leak-detector-29.7.0.tgz",
+      "integrity": "sha512-kYA8IJcSYtST2BY9I+SMC32nDpBT3J2NvWJx8+JCuCdl/CR1I4EKUJROiP8XtCcxqgTTBGJNdbB1A8XRKbTetw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "jest-get-type": "^29.6.3",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-leak-detector/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/jest-leak-detector/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-leak-detector/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-matcher-utils": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-29.7.0.tgz",
+      "integrity": "sha512-sBkD+Xi9DtcChsI3L3u0+N0opgPYnCRPtGcQYrgXmR+hmt/fYfWAL0xRXYU8eWOdfuLgBe0YCW3AFtnRLagq/g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "chalk": "^4.0.0",
+        "jest-diff": "^29.7.0",
+        "jest-get-type": "^29.6.3",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-matcher-utils/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/jest-matcher-utils/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-matcher-utils/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-message-util": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-29.7.0.tgz",
+      "integrity": "sha512-GBEV4GRADeP+qtB2+6u61stea8mGcOT4mCtrYISZwfu9/ISHFJ/5zOMXYbpBE9RsS5+Gb63DW4FgmnKJ79Kf6w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.12.13",
+        "@jest/types": "^29.6.3",
+        "@types/stack-utils": "^2.0.0",
+        "chalk": "^4.0.0",
+        "graceful-fs": "^4.2.9",
+        "micromatch": "^4.0.4",
+        "pretty-format": "^29.7.0",
+        "slash": "^3.0.0",
+        "stack-utils": "^2.0.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-message-util/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/jest-message-util/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-message-util/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-mock": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-29.7.0.tgz",
+      "integrity": "sha512-ITOMZn+UkYS4ZFh83xYAOzWStloNzJFO2s8DWrE4lhtGD+AorgnbkiKERe4wQVBydIGPx059g6riW5Btp6Llnw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "jest-util": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-pnp-resolver": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/jest-pnp-resolver/-/jest-pnp-resolver-1.2.3.tgz",
+      "integrity": "sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      },
+      "peerDependencies": {
+        "jest-resolve": "*"
+      },
+      "peerDependenciesMeta": {
+        "jest-resolve": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jest-regex-util": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz",
+      "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-resolve": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-resolve/-/jest-resolve-29.7.0.tgz",
+      "integrity": "sha512-IOVhZSrg+UvVAshDSDtHyFCCBUl/Q3AAJv8iZ6ZjnZ74xzvwuzLXid9IIIPgTnY62SJjfuupMKZsZQRsCvxEgA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "chalk": "^4.0.0",
+        "graceful-fs": "^4.2.9",
+        "jest-haste-map": "^29.7.0",
+        "jest-pnp-resolver": "^1.2.2",
+        "jest-util": "^29.7.0",
+        "jest-validate": "^29.7.0",
+        "resolve": "^1.20.0",
+        "resolve.exports": "^2.0.0",
+        "slash": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-resolve-dependencies": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-resolve-dependencies/-/jest-resolve-dependencies-29.7.0.tgz",
+      "integrity": "sha512-un0zD/6qxJ+S0et7WxeI3H5XSe9lTBBR7bOHCHXkKR6luG5mwDDlIzVQ0V5cZCuoTgEdcdwzTghYkTWfubi+nA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "jest-regex-util": "^29.6.3",
+        "jest-snapshot": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runner": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-runner/-/jest-runner-29.7.0.tgz",
+      "integrity": "sha512-fsc4N6cPCAahybGBfTRcq5wFR6fpLznMg47sY5aDpsoejOcVYFb07AHuSnR0liMcPTgBsA3ZJL6kFOjPdoNipQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/console": "^29.7.0",
+        "@jest/environment": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "emittery": "^0.13.1",
+        "graceful-fs": "^4.2.9",
+        "jest-docblock": "^29.7.0",
+        "jest-environment-node": "^29.7.0",
+        "jest-haste-map": "^29.7.0",
+        "jest-leak-detector": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-resolve": "^29.7.0",
+        "jest-runtime": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-watcher": "^29.7.0",
+        "jest-worker": "^29.7.0",
+        "p-limit": "^3.1.0",
+        "source-map-support": "0.5.13"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runtime": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-runtime/-/jest-runtime-29.7.0.tgz",
+      "integrity": "sha512-gUnLjgwdGqW7B4LvOIkbKs9WGbn+QLqRQQ9juC6HndeDiezIwhDP+mhMwHWCEcfQ5RUXa6OPnFF8BJh5xegwwQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/environment": "^29.7.0",
+        "@jest/fake-timers": "^29.7.0",
+        "@jest/globals": "^29.7.0",
+        "@jest/source-map": "^29.6.3",
+        "@jest/test-result": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "cjs-module-lexer": "^1.0.0",
+        "collect-v8-coverage": "^1.0.0",
+        "glob": "^7.1.3",
+        "graceful-fs": "^4.2.9",
+        "jest-haste-map": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-mock": "^29.7.0",
+        "jest-regex-util": "^29.6.3",
+        "jest-resolve": "^29.7.0",
+        "jest-snapshot": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "slash": "^3.0.0",
+        "strip-bom": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runtime/node_modules/strip-bom": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-4.0.0.tgz",
+      "integrity": "sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/jest-snapshot": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-29.7.0.tgz",
+      "integrity": "sha512-Rm0BMWtxBcioHr1/OX5YCP8Uov4riHvKPknOGs804Zg9JGZgmIBkbtlxJC/7Z4msKYVbIJtfU+tKb8xlYNfdkw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/core": "^7.11.6",
+        "@babel/generator": "^7.7.2",
+        "@babel/plugin-syntax-jsx": "^7.7.2",
+        "@babel/plugin-syntax-typescript": "^7.7.2",
+        "@babel/types": "^7.3.3",
+        "@jest/expect-utils": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "babel-preset-current-node-syntax": "^1.0.0",
+        "chalk": "^4.0.0",
+        "expect": "^29.7.0",
+        "graceful-fs": "^4.2.9",
+        "jest-diff": "^29.7.0",
+        "jest-get-type": "^29.6.3",
+        "jest-matcher-utils": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "natural-compare": "^1.4.0",
+        "pretty-format": "^29.7.0",
+        "semver": "^7.5.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-snapshot/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/jest-snapshot/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-snapshot/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-util": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-29.7.0.tgz",
+      "integrity": "sha512-z6EbKajIpqGKU56y5KBUgy1dt1ihhQJgWzUlZHArA/+X2ad7Cb5iF+AK1EWVL/Bo7Rz9uurpqw6SiBCefUbCGA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "ci-info": "^3.2.0",
+        "graceful-fs": "^4.2.9",
+        "picomatch": "^2.2.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-validate": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-29.7.0.tgz",
+      "integrity": "sha512-ZB7wHqaRGVw/9hST/OuFUReG7M8vKeq0/J2egIGLdvjHCmYqGARhzXmtgi+gVeZ5uXFF219aOc3Ls2yLg27tkw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "camelcase": "^6.2.0",
+        "chalk": "^4.0.0",
+        "jest-get-type": "^29.6.3",
+        "leven": "^3.1.0",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-validate/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/jest-validate/node_modules/camelcase": {
+      "version": "6.3.0",
+      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz",
+      "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/jest-validate/node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-validate/node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-watcher": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-watcher/-/jest-watcher-29.7.0.tgz",
+      "integrity": "sha512-49Fg7WXkU3Vl2h6LbLtMQ/HyB6rXSIX7SqvBLQmssRBGN9I0PNvPmAmCWSOY6SOvrjhI/F7/bGAv9RtnsPA03g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/test-result": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "ansi-escapes": "^4.2.1",
+        "chalk": "^4.0.0",
+        "emittery": "^0.13.1",
+        "jest-util": "^29.7.0",
+        "string-length": "^4.0.1"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-worker": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-29.7.0.tgz",
+      "integrity": "sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*",
+        "jest-util": "^29.7.0",
+        "merge-stream": "^2.0.0",
+        "supports-color": "^8.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-worker/node_modules/supports-color": {
+      "version": "8.1.1",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz",
+      "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-flag": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/supports-color?sponsor=1"
+      }
+    },
+    "node_modules/jiti": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.4.2.tgz",
+      "integrity": "sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "jiti": "lib/jiti-cli.mjs"
+      }
+    },
+    "node_modules/js-tokens": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
+      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/js-yaml": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
+      "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "argparse": "^2.0.1"
+      },
+      "bin": {
+        "js-yaml": "bin/js-yaml.js"
+      }
+    },
+    "node_modules/jsdom": {
+      "version": "20.0.3",
+      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-20.0.3.tgz",
+      "integrity": "sha512-SYhBvTh89tTfCD/CRdSOm13mOBa42iTaTyfyEWBdKcGdPxPtLFBXuHR8XHb33YNYaP+lLbmSvBTsnoesCNJEsQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "abab": "^2.0.6",
+        "acorn": "^8.8.1",
+        "acorn-globals": "^7.0.0",
+        "cssom": "^0.5.0",
+        "cssstyle": "^2.3.0",
+        "data-urls": "^3.0.2",
+        "decimal.js": "^10.4.2",
+        "domexception": "^4.0.0",
+        "escodegen": "^2.0.0",
+        "form-data": "^4.0.0",
+        "html-encoding-sniffer": "^3.0.0",
+        "http-proxy-agent": "^5.0.0",
+        "https-proxy-agent": "^5.0.1",
+        "is-potential-custom-element-name": "^1.0.1",
+        "nwsapi": "^2.2.2",
+        "parse5": "^7.1.1",
+        "saxes": "^6.0.0",
+        "symbol-tree": "^3.2.4",
+        "tough-cookie": "^4.1.2",
+        "w3c-xmlserializer": "^4.0.0",
+        "webidl-conversions": "^7.0.0",
+        "whatwg-encoding": "^2.0.0",
+        "whatwg-mimetype": "^3.0.0",
+        "whatwg-url": "^11.0.0",
+        "ws": "^8.11.0",
+        "xml-name-validator": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=14"
+      },
+      "peerDependencies": {
+        "canvas": "^2.5.0"
+      },
+      "peerDependenciesMeta": {
+        "canvas": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jsdom/node_modules/tr46": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-3.0.0.tgz",
+      "integrity": "sha512-l7FvfAHlcmulp8kr+flpQZmVwtu7nfRV7NZujtN0OqES8EL4O4e0qqzL0DC5gAvx/ZC/9lk6rhcUwYvkBnBnYA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "punycode": "^2.1.1"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/jsdom/node_modules/webidl-conversions": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz",
+      "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/jsdom/node_modules/whatwg-url": {
+      "version": "11.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-11.0.0.tgz",
+      "integrity": "sha512-RKT8HExMpoYx4igMiVMY83lN6UeITKJlBQ+vR/8ZJ8OCdSiN3RwCq+9gH0+Xzj0+5IrM6i4j/6LuvzbZIQgEcQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "tr46": "^3.0.0",
+        "webidl-conversions": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/jsesc": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz",
+      "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "jsesc": "bin/jsesc"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/json-buffer": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz",
+      "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/json-parse-even-better-errors": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
+      "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/json-schema-traverse": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
+      "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/json-stable-stringify-without-jsonify": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
+      "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/json5": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz",
+      "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "minimist": "^1.2.0"
+      },
+      "bin": {
+        "json5": "lib/cli.js"
+      }
+    },
+    "node_modules/jsx-ast-utils": {
+      "version": "3.3.5",
+      "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz",
+      "integrity": "sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "array-includes": "^3.1.6",
+        "array.prototype.flat": "^1.3.1",
+        "object.assign": "^4.1.4",
+        "object.values": "^1.1.6"
+      },
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/keyv": {
+      "version": "4.5.4",
+      "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz",
+      "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "json-buffer": "3.0.1"
+      }
+    },
+    "node_modules/kleur": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz",
+      "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/language-subtag-registry": {
+      "version": "0.3.23",
+      "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.23.tgz",
+      "integrity": "sha512-0K65Lea881pHotoGEa5gDlMxt3pctLi2RplBb7Ezh4rRdLEOtgi7n4EwK9lamnUCkKBqaeKRVebTq6BAxSkpXQ==",
+      "dev": true,
+      "license": "CC0-1.0"
+    },
+    "node_modules/language-tags": {
+      "version": "1.0.9",
+      "resolved": "https://registry.npmjs.org/language-tags/-/language-tags-1.0.9.tgz",
+      "integrity": "sha512-MbjN408fEndfiQXbFQ1vnd+1NoLDsnQW41410oQBXiyXDMYH5z505juWa4KUE1LqxRC7DgOgZDbKLxHIwm27hA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "language-subtag-registry": "^0.3.20"
+      },
+      "engines": {
+        "node": ">=0.10"
+      }
+    },
+    "node_modules/leven": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz",
+      "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/levn": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
+      "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "prelude-ls": "^1.2.1",
+        "type-check": "~0.4.0"
+      },
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/lightningcss": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.29.2.tgz",
+      "integrity": "sha512-6b6gd/RUXKaw5keVdSEtqFVdzWnU5jMxTUjA2bVcMNPLwSQ08Sv/UodBVtETLCn7k4S1Ibxwh7k68IwLZPgKaA==",
+      "dev": true,
+      "license": "MPL-2.0",
+      "dependencies": {
+        "detect-libc": "^2.0.3"
+      },
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      },
+      "optionalDependencies": {
+        "lightningcss-darwin-arm64": "1.29.2",
+        "lightningcss-darwin-x64": "1.29.2",
+        "lightningcss-freebsd-x64": "1.29.2",
+        "lightningcss-linux-arm-gnueabihf": "1.29.2",
+        "lightningcss-linux-arm64-gnu": "1.29.2",
+        "lightningcss-linux-arm64-musl": "1.29.2",
+        "lightningcss-linux-x64-gnu": "1.29.2",
+        "lightningcss-linux-x64-musl": "1.29.2",
+        "lightningcss-win32-arm64-msvc": "1.29.2",
+        "lightningcss-win32-x64-msvc": "1.29.2"
+      }
+    },
+    "node_modules/lightningcss-darwin-arm64": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.29.2.tgz",
+      "integrity": "sha512-cK/eMabSViKn/PG8U/a7aCorpeKLMlK0bQeNHmdb7qUnBkNPnL+oV5DjJUo0kqWsJUapZsM4jCfYItbqBDvlcA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-darwin-x64": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.29.2.tgz",
+      "integrity": "sha512-j5qYxamyQw4kDXX5hnnCKMf3mLlHvG44f24Qyi2965/Ycz829MYqjrVg2H8BidybHBp9kom4D7DR5VqCKDXS0w==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-freebsd-x64": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.29.2.tgz",
+      "integrity": "sha512-wDk7M2tM78Ii8ek9YjnY8MjV5f5JN2qNVO+/0BAGZRvXKtQrBC4/cn4ssQIpKIPP44YXw6gFdpUF+Ps+RGsCwg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-arm-gnueabihf": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.29.2.tgz",
+      "integrity": "sha512-IRUrOrAF2Z+KExdExe3Rz7NSTuuJ2HvCGlMKoquK5pjvo2JY4Rybr+NrKnq0U0hZnx5AnGsuFHjGnNT14w26sg==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-arm64-gnu": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.29.2.tgz",
+      "integrity": "sha512-KKCpOlmhdjvUTX/mBuaKemp0oeDIBBLFiU5Fnqxh1/DZ4JPZi4evEH7TKoSBFOSOV3J7iEmmBaw/8dpiUvRKlQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-arm64-musl": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.29.2.tgz",
+      "integrity": "sha512-Q64eM1bPlOOUgxFmoPUefqzY1yV3ctFPE6d/Vt7WzLW4rKTv7MyYNky+FWxRpLkNASTnKQUaiMJ87zNODIrrKQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-x64-gnu": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.29.2.tgz",
+      "integrity": "sha512-0v6idDCPG6epLXtBH/RPkHvYx74CVziHo6TMYga8O2EiQApnUPZsbR9nFNrg2cgBzk1AYqEd95TlrsL7nYABQg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-x64-musl": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.29.2.tgz",
+      "integrity": "sha512-rMpz2yawkgGT8RULc5S4WiZopVMOFWjiItBT7aSfDX4NQav6M44rhn5hjtkKzB+wMTRlLLqxkeYEtQ3dd9696w==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-win32-arm64-msvc": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.29.2.tgz",
+      "integrity": "sha512-nL7zRW6evGQqYVu/bKGK+zShyz8OVzsCotFgc7judbt6wnB2KbiKKJwBE4SGoDBQ1O94RjW4asrCjQL4i8Fhbw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-win32-x64-msvc": {
+      "version": "1.29.2",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.29.2.tgz",
+      "integrity": "sha512-EdIUW3B2vLuHmv7urfzMI/h2fmlnOQBk1xlsDxkN1tCWKjNFjfLhGxYk8C8mzpSfr+A6jFFIi8fU6LbQGsRWjA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lines-and-columns": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
+      "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/llama-stack-client": {
+      "version": "0.0.1-alpha.0",
+      "resolved": "git+ssh://git@github.com/stainless-sdks/llama-stack-node.git#5d34d229fb53b6dad02da0f19f4b310b529c6b15",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@types/node": "^18.11.18",
+        "@types/node-fetch": "^2.6.4",
+        "abort-controller": "^3.0.0",
+        "agentkeepalive": "^4.2.1",
+        "form-data-encoder": "1.7.2",
+        "formdata-node": "^4.3.2",
+        "node-fetch": "^2.6.7"
+      }
+    },
+    "node_modules/llama-stack-client/node_modules/@types/node": {
+      "version": "18.19.100",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.100.tgz",
+      "integrity": "sha512-ojmMP8SZBKprc3qGrGk8Ujpo80AXkrP7G2tOT4VWr5jlr5DHjsJF+emXJz+Wm0glmy4Js62oKMdZZ6B9Y+tEcA==",
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~5.26.4"
+      }
+    },
+    "node_modules/llama-stack-client/node_modules/undici-types": {
+      "version": "5.26.5",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+      "license": "MIT"
+    },
+    "node_modules/locate-path": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
+      "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-locate": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/lodash": {
+      "version": "4.17.21",
+      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
+      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/lodash.merge": {
+      "version": "4.6.2",
+      "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
+      "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/loose-envify": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
+      "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "js-tokens": "^3.0.0 || ^4.0.0"
+      },
+      "bin": {
+        "loose-envify": "cli.js"
+      }
+    },
+    "node_modules/lru-cache": {
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
+      "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "yallist": "^3.0.2"
+      }
+    },
+    "node_modules/lru-cache/node_modules/yallist": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz",
+      "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/lucide-react": {
+      "version": "0.510.0",
+      "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.510.0.tgz",
+      "integrity": "sha512-p8SQRAMVh7NhsAIETokSqDrc5CHnDLbV29mMnzaXx+Vc/hnqQzwI2r0FMWCcoTXnbw2KEjy48xwpGdEL+ck06Q==",
+      "license": "ISC",
+      "peerDependencies": {
+        "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+      }
+    },
+    "node_modules/lz-string": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz",
+      "integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "lz-string": "bin/bin.js"
+      }
+    },
+    "node_modules/magic-string": {
+      "version": "0.30.17",
+      "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz",
+      "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/sourcemap-codec": "^1.5.0"
+      }
+    },
+    "node_modules/make-dir": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-4.0.0.tgz",
+      "integrity": "sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "semver": "^7.5.3"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/make-error": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
+      "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/makeerror": {
+      "version": "1.0.12",
+      "resolved": "https://registry.npmjs.org/makeerror/-/makeerror-1.0.12.tgz",
+      "integrity": "sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "tmpl": "1.0.5"
+      }
+    },
+    "node_modules/math-intrinsics": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
+      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/media-typer": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz",
+      "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/merge-descriptors": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz",
+      "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/merge-stream": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
+      "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/merge2": {
+      "version": "1.4.1",
+      "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
+      "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/micromatch": {
+      "version": "4.0.8",
+      "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
+      "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "braces": "^3.0.3",
+        "picomatch": "^2.3.1"
+      },
+      "engines": {
+        "node": ">=8.6"
+      }
+    },
+    "node_modules/mime-db": {
+      "version": "1.54.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz",
+      "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mime-types": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.1.tgz",
+      "integrity": "sha512-xRc4oEhT6eaBpU1XF7AjpOFD+xQmXNB5OVKwp4tqCuBpHLS/ZbBDrc07mYTDqVMg6PfxUjjNp85O6Cd2Z/5HWA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "^1.54.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mimic-fn": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz",
+      "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/min-indent": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz",
+      "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/minimatch": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
+      "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "brace-expansion": "^1.1.7"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/minimist": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
+      "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/minipass": {
+      "version": "7.1.2",
+      "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz",
+      "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": ">=16 || 14 >=14.17"
+      }
+    },
+    "node_modules/minizlib": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz",
+      "integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "minipass": "^7.1.2"
+      },
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/mkdirp": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
+      "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "mkdirp": "dist/cjs/src/bin.js"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/ms": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+      "license": "MIT"
+    },
+    "node_modules/nanoid": {
+      "version": "3.3.11",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
+      "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "bin": {
+        "nanoid": "bin/nanoid.cjs"
+      },
+      "engines": {
+        "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
+      }
+    },
+    "node_modules/napi-postinstall": {
+      "version": "0.2.4",
+      "resolved": "https://registry.npmjs.org/napi-postinstall/-/napi-postinstall-0.2.4.tgz",
+      "integrity": "sha512-ZEzHJwBhZ8qQSbknHqYcdtQVr8zUgGyM/q6h6qAyhtyVMNrSgDhrC4disf03dYW0e+czXyLnZINnCTEkWy0eJg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "napi-postinstall": "lib/cli.js"
+      },
+      "engines": {
+        "node": "^12.20.0 || ^14.18.0 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/napi-postinstall"
+      }
+    },
+    "node_modules/natural-compare": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
+      "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/negotiator": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz",
+      "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/next": {
+      "version": "15.3.2",
+      "resolved": "https://registry.npmjs.org/next/-/next-15.3.2.tgz",
+      "integrity": "sha512-CA3BatMyHkxZ48sgOCLdVHjFU36N7TF1HhqAHLFOkV6buwZnvMI84Cug8xD56B9mCuKrqXnLn94417GrZ/jjCQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@next/env": "15.3.2",
+        "@swc/counter": "0.1.3",
+        "@swc/helpers": "0.5.15",
+        "busboy": "1.6.0",
+        "caniuse-lite": "^1.0.30001579",
+        "postcss": "8.4.31",
+        "styled-jsx": "5.1.6"
+      },
+      "bin": {
+        "next": "dist/bin/next"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^19.8.0 || >= 20.0.0"
+      },
+      "optionalDependencies": {
+        "@next/swc-darwin-arm64": "15.3.2",
+        "@next/swc-darwin-x64": "15.3.2",
+        "@next/swc-linux-arm64-gnu": "15.3.2",
+        "@next/swc-linux-arm64-musl": "15.3.2",
+        "@next/swc-linux-x64-gnu": "15.3.2",
+        "@next/swc-linux-x64-musl": "15.3.2",
+        "@next/swc-win32-arm64-msvc": "15.3.2",
+        "@next/swc-win32-x64-msvc": "15.3.2",
+        "sharp": "^0.34.1"
+      },
+      "peerDependencies": {
+        "@opentelemetry/api": "^1.1.0",
+        "@playwright/test": "^1.41.2",
+        "babel-plugin-react-compiler": "*",
+        "react": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0",
+        "react-dom": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0",
+        "sass": "^1.3.0"
+      },
+      "peerDependenciesMeta": {
+        "@opentelemetry/api": {
+          "optional": true
+        },
+        "@playwright/test": {
+          "optional": true
+        },
+        "babel-plugin-react-compiler": {
+          "optional": true
+        },
+        "sass": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/next-themes": {
+      "version": "0.4.6",
+      "resolved": "https://registry.npmjs.org/next-themes/-/next-themes-0.4.6.tgz",
+      "integrity": "sha512-pZvgD5L0IEvX5/9GWyHMf3m8BKiVQwsCMHfoFosXtXBMnaS0ZnIJ9ST4b4NqLVKDEm8QBxoNNGNaBv2JNF6XNA==",
+      "license": "MIT",
+      "peerDependencies": {
+        "react": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc"
+      }
+    },
+    "node_modules/next/node_modules/postcss": {
+      "version": "8.4.31",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
+      "integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==",
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/postcss"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "nanoid": "^3.3.6",
+        "picocolors": "^1.0.0",
+        "source-map-js": "^1.0.2"
+      },
+      "engines": {
+        "node": "^10 || ^12 || >=14"
+      }
+    },
+    "node_modules/node-domexception": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
+      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
+      "deprecated": "Use your platform's native DOMException instead",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/jimmywarting"
+        },
+        {
+          "type": "github",
+          "url": "https://paypal.me/jimmywarting"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.5.0"
+      }
+    },
+    "node_modules/node-fetch": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
+      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+      "license": "MIT",
+      "dependencies": {
+        "whatwg-url": "^5.0.0"
+      },
+      "engines": {
+        "node": "4.x || >=6.0.0"
+      },
+      "peerDependencies": {
+        "encoding": "^0.1.0"
+      },
+      "peerDependenciesMeta": {
+        "encoding": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/node-int64": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz",
+      "integrity": "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/node-releases": {
+      "version": "2.0.19",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
+      "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/normalize-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
+      "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/npm-run-path": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz",
+      "integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "path-key": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/nwsapi": {
+      "version": "2.2.20",
+      "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.20.tgz",
+      "integrity": "sha512-/ieB+mDe4MrrKMT8z+mQL8klXydZWGR5Dowt4RAGKbJ3kIGEx3X4ljUo+6V73IXtUPWgfOlU5B9MlGxFO5T+cA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/object-assign": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
+      "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/object-inspect": {
+      "version": "1.13.4",
+      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz",
+      "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/object-keys": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
+      "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/object.assign": {
+      "version": "4.1.7",
+      "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.7.tgz",
+      "integrity": "sha512-nK28WOo+QIjBkDduTINE4JkF/UJJKyf2EJxvJKfblDpyg0Q+pkOHNTL0Qwy6NP6FhE/EnzV73BxxqcJaXY9anw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.3",
+        "define-properties": "^1.2.1",
+        "es-object-atoms": "^1.0.0",
+        "has-symbols": "^1.1.0",
+        "object-keys": "^1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/object.entries": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.9.tgz",
+      "integrity": "sha512-8u/hfXFRBD1O0hPUjioLhoWFHRmt6tKA4/vZPyckBr18l1KE9uHrFaFaUi8MDRTpi4uak2goyPTSNJLXX2k2Hw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.4",
+        "define-properties": "^1.2.1",
+        "es-object-atoms": "^1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/object.fromentries": {
+      "version": "2.0.8",
+      "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.8.tgz",
+      "integrity": "sha512-k6E21FzySsSK5a21KRADBd/NGneRegFO5pLHfdQLpRDETUNJueLXs3WCzyQ3tFRDYgbq3KHGXfTbi2bs8WQ6rQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.7",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.2",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/object.groupby": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.3.tgz",
+      "integrity": "sha512-+Lhy3TQTuzXI5hevh8sBGqbmurHbbIjAi0Z4S63nthVLmLxfbj4T54a4CfZrXIrt9iP4mVAPYMo/v99taj3wjQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.7",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/object.values": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.2.1.tgz",
+      "integrity": "sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.3",
+        "define-properties": "^1.2.1",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/on-finished": {
+      "version": "2.4.1",
+      "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz",
+      "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ee-first": "1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/once": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
+      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "wrappy": "1"
+      }
+    },
+    "node_modules/onetime": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
+      "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mimic-fn": "^2.1.0"
+      },
+      "engines": {
+        "node": ">=6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/openai": {
+      "version": "4.103.0",
+      "resolved": "https://registry.npmjs.org/openai/-/openai-4.103.0.tgz",
+      "integrity": "sha512-eWcz9kdurkGOFDtd5ySS5y251H2uBgq9+1a2lTBnjMMzlexJ40Am5t6Mu76SSE87VvitPa0dkIAp75F+dZVC0g==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@types/node": "^18.11.18",
+        "@types/node-fetch": "^2.6.4",
+        "abort-controller": "^3.0.0",
+        "agentkeepalive": "^4.2.1",
+        "form-data-encoder": "1.7.2",
+        "formdata-node": "^4.3.2",
+        "node-fetch": "^2.6.7"
+      },
+      "bin": {
+        "openai": "bin/cli"
+      },
+      "peerDependencies": {
+        "ws": "^8.18.0",
+        "zod": "^3.23.8"
+      },
+      "peerDependenciesMeta": {
+        "ws": {
+          "optional": true
+        },
+        "zod": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/openai/node_modules/@types/node": {
+      "version": "18.19.103",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.103.tgz",
+      "integrity": "sha512-hHTHp+sEz6SxFsp+SA+Tqrua3AbmlAw+Y//aEwdHrdZkYVRWdvWD3y5uPZ0flYOkgskaFWqZ/YGFm3FaFQ0pRw==",
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~5.26.4"
+      }
+    },
+    "node_modules/openai/node_modules/undici-types": {
+      "version": "5.26.5",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+      "license": "MIT"
+    },
+    "node_modules/optionator": {
+      "version": "0.9.4",
+      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
+      "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "deep-is": "^0.1.3",
+        "fast-levenshtein": "^2.0.6",
+        "levn": "^0.4.1",
+        "prelude-ls": "^1.2.1",
+        "type-check": "^0.4.0",
+        "word-wrap": "^1.2.5"
+      },
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/own-keys": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/own-keys/-/own-keys-1.0.1.tgz",
+      "integrity": "sha512-qFOyK5PjiWZd+QQIh+1jhdb9LpxTF0qs7Pm8o5QHYZ0M3vKqSqzsZaEB6oWlxZ+q2sJBMI/Ktgd2N5ZwQoRHfg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "get-intrinsic": "^1.2.6",
+        "object-keys": "^1.1.1",
+        "safe-push-apply": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/p-limit": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
+      "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "yocto-queue": "^0.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/p-locate": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz",
+      "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-limit": "^3.0.2"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/p-try": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
+      "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/parent-module": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
+      "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "callsites": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/parse-json": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz",
+      "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.0.0",
+        "error-ex": "^1.3.1",
+        "json-parse-even-better-errors": "^2.3.0",
+        "lines-and-columns": "^1.1.6"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/parse5": {
+      "version": "7.3.0",
+      "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
+      "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "entities": "^6.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
+    "node_modules/parseurl": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
+      "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/path-exists": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
+      "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/path-is-absolute": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
+      "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/path-key": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
+      "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/path-parse": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
+      "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/path-to-regexp": {
+      "version": "8.2.0",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.2.0.tgz",
+      "integrity": "sha512-TdrF7fW9Rphjq4RjrW0Kp2AW0Ahwu9sRGTkS6bvDi0SCwZlEZYmcfDbEsTz8RVk0EHIS/Vd1bv3JhG+1xZuAyQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/picocolors": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
+      "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
+      "license": "ISC"
+    },
+    "node_modules/picomatch": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
+      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/pirates": {
+      "version": "4.0.7",
+      "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz",
+      "integrity": "sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/pkce-challenge": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.0.tgz",
+      "integrity": "sha512-ueGLflrrnvwB3xuo/uGob5pd5FN7l0MsLf0Z87o/UQmRtwjvfylfc9MurIxRAWywCYTgrvpXBcqjV4OfCYGCIQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=16.20.0"
+      }
+    },
+    "node_modules/pkg-dir": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz",
+      "integrity": "sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "find-up": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/pkg-dir/node_modules/find-up": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
+      "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "locate-path": "^5.0.0",
+        "path-exists": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/pkg-dir/node_modules/locate-path": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
+      "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-locate": "^4.1.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/pkg-dir/node_modules/p-limit": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
+      "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-try": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/pkg-dir/node_modules/p-locate": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
+      "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-limit": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/possible-typed-array-names": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz",
+      "integrity": "sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/postcss": {
+      "version": "8.5.3",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.3.tgz",
+      "integrity": "sha512-dle9A3yYxlBSrt8Fu+IpjGT8SY8hN0mlaA6GY8t0P5PjIOZemULz/E2Bnm/2dcUOena75OTNkHI76uZBNUUq3A==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/postcss"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "nanoid": "^3.3.8",
+        "picocolors": "^1.1.1",
+        "source-map-js": "^1.2.1"
+      },
+      "engines": {
+        "node": "^10 || ^12 || >=14"
+      }
+    },
+    "node_modules/prelude-ls": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
+      "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/prettier": {
+      "version": "3.5.3",
+      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.5.3.tgz",
+      "integrity": "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "prettier": "bin/prettier.cjs"
+      },
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/prettier/prettier?sponsor=1"
+      }
+    },
+    "node_modules/prettier-linter-helpers": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/prettier-linter-helpers/-/prettier-linter-helpers-1.0.0.tgz",
+      "integrity": "sha512-GbK2cP9nraSSUF9N2XwUwqfzlAFlMNYYl+ShE/V+H8a9uNl/oUqB1w2EL54Jh0OlyRSd8RfWYJ3coVS4TROP2w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fast-diff": "^1.1.2"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/pretty-format": {
+      "version": "27.5.1",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz",
+      "integrity": "sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-regex": "^5.0.1",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^17.0.1"
+      },
+      "engines": {
+        "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0"
+      }
+    },
+    "node_modules/pretty-format/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/pretty-format/node_modules/react-is": {
+      "version": "17.0.2",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz",
+      "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/prompts": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz",
+      "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "kleur": "^3.0.3",
+        "sisteransi": "^1.0.5"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/prop-types": {
+      "version": "15.8.1",
+      "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
+      "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "loose-envify": "^1.4.0",
+        "object-assign": "^4.1.1",
+        "react-is": "^16.13.1"
+      }
+    },
+    "node_modules/proxy-addr": {
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
+      "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "forwarded": "0.2.0",
+        "ipaddr.js": "1.9.1"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/psl": {
+      "version": "1.15.0",
+      "resolved": "https://registry.npmjs.org/psl/-/psl-1.15.0.tgz",
+      "integrity": "sha512-JZd3gMVBAVQkSs6HdNZo9Sdo0LNcQeMNP3CozBJb3JYC/QUYZTnKxP+f8oWRX4rHP5EurWxqAHTSwUCjlNKa1w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "punycode": "^2.3.1"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/lupomontero"
+      }
+    },
+    "node_modules/punycode": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
+      "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/pure-rand": {
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
+      "integrity": "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "individual",
+          "url": "https://github.com/sponsors/dubzzz"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fast-check"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/qs": {
+      "version": "6.14.0",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz",
+      "integrity": "sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "side-channel": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=0.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/querystringify": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz",
+      "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/queue-microtask": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
+      "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/range-parser": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
+      "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/raw-body": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.0.tgz",
+      "integrity": "sha512-RmkhL8CAyCRPXCE28MMH0z2PNWQBNk2Q09ZdxM9IOOXwxwZbN+qbWaatPkdkWIKL2ZVDImrN/pK5HTRz2PcS4g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "bytes": "3.1.2",
+        "http-errors": "2.0.0",
+        "iconv-lite": "0.6.3",
+        "unpipe": "1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/react": {
+      "version": "19.1.0",
+      "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
+      "integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/react-dom": {
+      "version": "19.1.0",
+      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz",
+      "integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
+      "license": "MIT",
+      "dependencies": {
+        "scheduler": "^0.26.0"
+      },
+      "peerDependencies": {
+        "react": "^19.1.0"
+      }
+    },
+    "node_modules/react-is": {
+      "version": "16.13.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
+      "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/react-remove-scroll": {
+      "version": "2.6.3",
+      "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.6.3.tgz",
+      "integrity": "sha512-pnAi91oOk8g8ABQKGF5/M9qxmmOPxaAnopyTHYfqYEwJhyFrbbBtHuSgtKEoH0jpcxx5o3hXqH1mNd9/Oi+8iQ==",
+      "license": "MIT",
+      "dependencies": {
+        "react-remove-scroll-bar": "^2.3.7",
+        "react-style-singleton": "^2.2.3",
+        "tslib": "^2.1.0",
+        "use-callback-ref": "^1.3.3",
+        "use-sidecar": "^1.1.3"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/react-remove-scroll-bar": {
+      "version": "2.3.8",
+      "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.8.tgz",
+      "integrity": "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==",
+      "license": "MIT",
+      "dependencies": {
+        "react-style-singleton": "^2.2.2",
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/react-style-singleton": {
+      "version": "2.2.3",
+      "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.3.tgz",
+      "integrity": "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==",
+      "license": "MIT",
+      "dependencies": {
+        "get-nonce": "^1.0.0",
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/redent": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz",
+      "integrity": "sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "indent-string": "^4.0.0",
+        "strip-indent": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/reflect.getprototypeof": {
+      "version": "1.0.10",
+      "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.10.tgz",
+      "integrity": "sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.9",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.0.0",
+        "get-intrinsic": "^1.2.7",
+        "get-proto": "^1.0.1",
+        "which-builtin-type": "^1.2.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/regexp.prototype.flags": {
+      "version": "1.5.4",
+      "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz",
+      "integrity": "sha512-dYqgNSZbDwkaJ2ceRd9ojCGjBq+mOm9LmtXnAnEGyHhN/5R7iDW2TRw3h+o/jCFxus3P2LfWIIiwowAjANm7IA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "define-properties": "^1.2.1",
+        "es-errors": "^1.3.0",
+        "get-proto": "^1.0.1",
+        "gopd": "^1.2.0",
+        "set-function-name": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/require-directory": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
+      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/requires-port": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
+      "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/resolve": {
+      "version": "1.22.10",
+      "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.10.tgz",
+      "integrity": "sha512-NPRy+/ncIMeDlTAsuqwKIiferiawhefFJtkNSW0qZJEqMEb+qBt/77B/jGeeek+F0uOeN05CDa6HXbbIgtVX4w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-core-module": "^2.16.0",
+        "path-parse": "^1.0.7",
+        "supports-preserve-symlinks-flag": "^1.0.0"
+      },
+      "bin": {
+        "resolve": "bin/resolve"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/resolve-cwd": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz",
+      "integrity": "sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "resolve-from": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/resolve-cwd/node_modules/resolve-from": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz",
+      "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/resolve-from": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
+      "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/resolve-pkg-maps": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
+      "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
+      }
+    },
+    "node_modules/resolve.exports": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/resolve.exports/-/resolve.exports-2.0.3.tgz",
+      "integrity": "sha512-OcXjMsGdhL4XnbShKpAcSqPMzQoYkYyhbEaeSko47MjRP9NfEQMhZkXL1DoFlt9LWQn4YttrdnV6X2OiyzBi+A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/reusify": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz",
+      "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "iojs": ">=1.0.0",
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/router": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz",
+      "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.4.0",
+        "depd": "^2.0.0",
+        "is-promise": "^4.0.0",
+        "parseurl": "^1.3.3",
+        "path-to-regexp": "^8.0.0"
+      },
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/run-parallel": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz",
+      "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "queue-microtask": "^1.2.2"
+      }
+    },
+    "node_modules/safe-array-concat": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.1.3.tgz",
+      "integrity": "sha512-AURm5f0jYEOydBj7VQlVvDrjeFgthDdEF5H1dP+6mNpoXOMo1quQqJ4wvJDyRZ9+pO3kGWoOdmV08cSv2aJV6Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.2",
+        "get-intrinsic": "^1.2.6",
+        "has-symbols": "^1.1.0",
+        "isarray": "^2.0.5"
+      },
+      "engines": {
+        "node": ">=0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/safe-buffer": {
+      "version": "5.2.1",
+      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
+      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/safe-push-apply": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/safe-push-apply/-/safe-push-apply-1.0.0.tgz",
+      "integrity": "sha512-iKE9w/Z7xCzUMIZqdBsp6pEQvwuEebH4vdpjcDWnyzaI6yl6O9FHvVpmGelvEHNsoY6wGblkxR6Zty/h00WiSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "isarray": "^2.0.5"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/safe-regex-test": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.1.0.tgz",
+      "integrity": "sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "is-regex": "^1.2.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/safer-buffer": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/saxes": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
+      "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "xmlchars": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=v12.22.7"
+      }
+    },
+    "node_modules/scheduler": {
+      "version": "0.26.0",
+      "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz",
+      "integrity": "sha512-NlHwttCI/l5gCPR3D1nNXtWABUmBwvZpEQiD4IXSbIDq8BzLIK/7Ir5gTFSGZDUu37K5cMNp0hFtzO38sC7gWA==",
+      "license": "MIT"
+    },
+    "node_modules/semver": {
+      "version": "7.7.2",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz",
+      "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==",
+      "devOptional": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/send": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/send/-/send-1.2.0.tgz",
+      "integrity": "sha512-uaW0WwXKpL9blXE2o0bRhoL2EGXIrZxQ2ZQ4mgcfoBxdFmQold+qWsD2jLrfZ0trjKL6vOw0j//eAwcALFjKSw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.3.5",
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "etag": "^1.8.1",
+        "fresh": "^2.0.0",
+        "http-errors": "^2.0.0",
+        "mime-types": "^3.0.1",
+        "ms": "^2.1.3",
+        "on-finished": "^2.4.1",
+        "range-parser": "^1.2.1",
+        "statuses": "^2.0.1"
+      },
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/serve-static": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.0.tgz",
+      "integrity": "sha512-61g9pCh0Vnh7IutZjtLGGpTA355+OPn2TyDv/6ivP2h/AdAVX9azsoxmg2/M6nZeQZNYBEwIcsne1mJd9oQItQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "parseurl": "^1.3.3",
+        "send": "^1.2.0"
+      },
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/set-function-length": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
+      "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "define-data-property": "^1.1.4",
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2",
+        "get-intrinsic": "^1.2.4",
+        "gopd": "^1.0.1",
+        "has-property-descriptors": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/set-function-name": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.2.tgz",
+      "integrity": "sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "define-data-property": "^1.1.4",
+        "es-errors": "^1.3.0",
+        "functions-have-names": "^1.2.3",
+        "has-property-descriptors": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/set-proto": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/set-proto/-/set-proto-1.0.0.tgz",
+      "integrity": "sha512-RJRdvCo6IAnPdsvP/7m6bsQqNnn1FCBX5ZNtFL98MmFF/4xAIJTIg1YbHW5DC2W5SKZanrC6i4HsJqlajw/dZw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "dunder-proto": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/setprototypeof": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
+      "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/sharp": {
+      "version": "0.34.1",
+      "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.1.tgz",
+      "integrity": "sha512-1j0w61+eVxu7DawFJtnfYcvSv6qPFvfTaqzTQ2BLknVhHTwGS8sc63ZBF4rzkWMBVKybo4S5OBtDdZahh2A1xg==",
+      "hasInstallScript": true,
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "color": "^4.2.3",
+        "detect-libc": "^2.0.3",
+        "semver": "^7.7.1"
+      },
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-darwin-arm64": "0.34.1",
+        "@img/sharp-darwin-x64": "0.34.1",
+        "@img/sharp-libvips-darwin-arm64": "1.1.0",
+        "@img/sharp-libvips-darwin-x64": "1.1.0",
+        "@img/sharp-libvips-linux-arm": "1.1.0",
+        "@img/sharp-libvips-linux-arm64": "1.1.0",
+        "@img/sharp-libvips-linux-ppc64": "1.1.0",
+        "@img/sharp-libvips-linux-s390x": "1.1.0",
+        "@img/sharp-libvips-linux-x64": "1.1.0",
+        "@img/sharp-libvips-linuxmusl-arm64": "1.1.0",
+        "@img/sharp-libvips-linuxmusl-x64": "1.1.0",
+        "@img/sharp-linux-arm": "0.34.1",
+        "@img/sharp-linux-arm64": "0.34.1",
+        "@img/sharp-linux-s390x": "0.34.1",
+        "@img/sharp-linux-x64": "0.34.1",
+        "@img/sharp-linuxmusl-arm64": "0.34.1",
+        "@img/sharp-linuxmusl-x64": "0.34.1",
+        "@img/sharp-wasm32": "0.34.1",
+        "@img/sharp-win32-ia32": "0.34.1",
+        "@img/sharp-win32-x64": "0.34.1"
+      }
+    },
+    "node_modules/shebang-command": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
+      "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "shebang-regex": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/shebang-regex": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
+      "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/side-channel": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz",
+      "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "object-inspect": "^1.13.3",
+        "side-channel-list": "^1.0.0",
+        "side-channel-map": "^1.0.1",
+        "side-channel-weakmap": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-list": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz",
+      "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "object-inspect": "^1.13.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-map": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz",
+      "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.5",
+        "object-inspect": "^1.13.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-weakmap": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz",
+      "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.5",
+        "object-inspect": "^1.13.3",
+        "side-channel-map": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/signal-exit": {
+      "version": "3.0.7",
+      "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
+      "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/simple-swizzle": {
+      "version": "0.2.2",
+      "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
+      "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "is-arrayish": "^0.3.1"
+      }
+    },
+    "node_modules/sisteransi": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz",
+      "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/slash": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
+      "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/source-map": {
+      "version": "0.6.1",
+      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
+      "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/source-map-js": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
+      "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/source-map-support": {
+      "version": "0.5.13",
+      "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.13.tgz",
+      "integrity": "sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "buffer-from": "^1.0.0",
+        "source-map": "^0.6.0"
+      }
+    },
+    "node_modules/sprintf-js": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
+      "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==",
+      "dev": true,
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/stable-hash": {
+      "version": "0.0.5",
+      "resolved": "https://registry.npmjs.org/stable-hash/-/stable-hash-0.0.5.tgz",
+      "integrity": "sha512-+L3ccpzibovGXFK+Ap/f8LOS0ahMrHTf3xu7mMLSpEGU0EO9ucaysSylKo9eRDFNhWve/y275iPmIZ4z39a9iA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/stack-utils": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.6.tgz",
+      "integrity": "sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "escape-string-regexp": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/stack-utils/node_modules/escape-string-regexp": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz",
+      "integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/statuses": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz",
+      "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/streamsearch": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
+      "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
+    "node_modules/string-length": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/string-length/-/string-length-4.0.2.tgz",
+      "integrity": "sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "char-regex": "^1.0.2",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/string-width": {
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
+      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/string-width/node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/string.prototype.includes": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/string.prototype.includes/-/string.prototype.includes-2.0.1.tgz",
+      "integrity": "sha512-o7+c9bW6zpAdJHTtujeePODAhkuicdAryFsfVKwA+wGw89wJ4GTY484WTucM9hLtDEOpOvI+aHnzqnC5lHp4Rg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.7",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/string.prototype.matchall": {
+      "version": "4.0.12",
+      "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.12.tgz",
+      "integrity": "sha512-6CC9uyBL+/48dYizRf7H7VAYCMCNTBeM78x/VTUe9bFEaxBepPJDa1Ow99LqI/1yF7kuy7Q3cQsYMrcjGUcskA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.3",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.6",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.0.0",
+        "get-intrinsic": "^1.2.6",
+        "gopd": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "internal-slot": "^1.1.0",
+        "regexp.prototype.flags": "^1.5.3",
+        "set-function-name": "^2.0.2",
+        "side-channel": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/string.prototype.repeat": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/string.prototype.repeat/-/string.prototype.repeat-1.0.0.tgz",
+      "integrity": "sha512-0u/TldDbKD8bFCQ/4f5+mNRrXwZ8hg2w7ZR8wa16e8z9XpePWl3eGEcUD0OXpEH/VJH/2G3gjUtR3ZOiBe2S/w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "define-properties": "^1.1.3",
+        "es-abstract": "^1.17.5"
+      }
+    },
+    "node_modules/string.prototype.trim": {
+      "version": "1.2.10",
+      "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.10.tgz",
+      "integrity": "sha512-Rs66F0P/1kedk5lyYyH9uBzuiI/kNRmwJAR9quK6VOtIpZ2G+hMZd+HQbbv25MgCA6gEffoMZYxlTod4WcdrKA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.2",
+        "define-data-property": "^1.1.4",
+        "define-properties": "^1.2.1",
+        "es-abstract": "^1.23.5",
+        "es-object-atoms": "^1.0.0",
+        "has-property-descriptors": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/string.prototype.trimend": {
+      "version": "1.0.9",
+      "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.9.tgz",
+      "integrity": "sha512-G7Ok5C6E/j4SGfyLCloXTrngQIQU3PWtXGst3yM7Bea9FRURf1S42ZHlZZtsNque2FN2PoUhfZXYLNWwEr4dLQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.2",
+        "define-properties": "^1.2.1",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/string.prototype.trimstart": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.8.tgz",
+      "integrity": "sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.7",
+        "define-properties": "^1.2.1",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/strip-ansi": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
+      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-regex": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-bom": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
+      "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/strip-final-newline": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz",
+      "integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/strip-indent": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz",
+      "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "min-indent": "^1.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-json-comments": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
+      "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/styled-jsx": {
+      "version": "5.1.6",
+      "resolved": "https://registry.npmjs.org/styled-jsx/-/styled-jsx-5.1.6.tgz",
+      "integrity": "sha512-qSVyDTeMotdvQYoHWLNGwRFJHC+i+ZvdBRYosOFgC+Wg1vx4frN2/RG/NA7SYqqvKNLf39P2LSRA2pu6n0XYZA==",
+      "license": "MIT",
+      "dependencies": {
+        "client-only": "0.0.1"
+      },
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "peerDependencies": {
+        "react": ">= 16.8.0 || 17.x.x || ^18.0.0-0 || ^19.0.0-0"
+      },
+      "peerDependenciesMeta": {
+        "@babel/core": {
+          "optional": true
+        },
+        "babel-plugin-macros": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/supports-color": {
+      "version": "7.2.0",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
+      "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-flag": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/supports-preserve-symlinks-flag": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz",
+      "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/symbol-tree": {
+      "version": "3.2.4",
+      "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz",
+      "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/synckit": {
+      "version": "0.11.5",
+      "resolved": "https://registry.npmjs.org/synckit/-/synckit-0.11.5.tgz",
+      "integrity": "sha512-frqvfWyDA5VPVdrWfH24uM6SI/O8NLpVbIIJxb8t/a3YGsp4AW9CYgSKC0OaSEfexnp7Y1pVh2Y6IHO8ggGDmA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@pkgr/core": "^0.2.4",
+        "tslib": "^2.8.1"
+      },
+      "engines": {
+        "node": "^14.18.0 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/synckit"
+      }
+    },
+    "node_modules/tailwind-merge": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.3.0.tgz",
+      "integrity": "sha512-fyW/pEfcQSiigd5SNn0nApUOxx0zB/dm6UDU/rEwc2c3sX2smWUNbapHv+QRqLGVp9GWX3THIa7MUGPo+YkDzQ==",
+      "license": "MIT",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/dcastil"
+      }
+    },
+    "node_modules/tailwindcss": {
+      "version": "4.1.6",
+      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
+      "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/tapable": {
+      "version": "2.2.1",
+      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
+      "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/tar": {
+      "version": "7.4.3",
+      "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
+      "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "@isaacs/fs-minipass": "^4.0.0",
+        "chownr": "^3.0.0",
+        "minipass": "^7.1.2",
+        "minizlib": "^3.0.1",
+        "mkdirp": "^3.0.1",
+        "yallist": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/test-exclude": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz",
+      "integrity": "sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "@istanbuljs/schema": "^0.1.2",
+        "glob": "^7.1.4",
+        "minimatch": "^3.0.4"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/tinyglobby": {
+      "version": "0.2.13",
+      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.13.tgz",
+      "integrity": "sha512-mEwzpUgrLySlveBwEVDMKk5B57bhLPYovRfPAXD5gA/98Opn0rCDj3GtLwFvCvH5RK9uPCExUROW5NjDwvqkxw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fdir": "^6.4.4",
+        "picomatch": "^4.0.2"
+      },
+      "engines": {
+        "node": ">=12.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/SuperchupuDev"
+      }
+    },
+    "node_modules/tinyglobby/node_modules/fdir": {
+      "version": "6.4.4",
+      "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.4.tgz",
+      "integrity": "sha512-1NZP+GK4GfuAv3PqKvxQRDMjdSRZjnkq7KfhlNrCNNlZ0ygQFpebfrnfnq/W7fpUnAv9aGWmY1zKx7FYL3gwhg==",
+      "dev": true,
+      "license": "MIT",
+      "peerDependencies": {
+        "picomatch": "^3 || ^4"
+      },
+      "peerDependenciesMeta": {
+        "picomatch": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/tinyglobby/node_modules/picomatch": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz",
+      "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/tmpl": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
+      "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
+      "dev": true,
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/to-regex-range": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
+      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-number": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
+    "node_modules/toidentifier": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
+      "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.6"
+      }
+    },
+    "node_modules/tough-cookie": {
+      "version": "4.1.4",
+      "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.4.tgz",
+      "integrity": "sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "psl": "^1.1.33",
+        "punycode": "^2.1.1",
+        "universalify": "^0.2.0",
+        "url-parse": "^1.5.3"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/tr46": {
+      "version": "0.0.3",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
+      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
+      "license": "MIT"
+    },
+    "node_modules/ts-api-utils": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.1.0.tgz",
+      "integrity": "sha512-CUgTZL1irw8u29bzrOD/nH85jqyc74D6SshFgujOIA7osm2Rz7dYH77agkx7H4FBNxDq7Cjf+IjaX/8zwFW+ZQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.12"
+      },
+      "peerDependencies": {
+        "typescript": ">=4.8.4"
+      }
+    },
+    "node_modules/ts-node": {
+      "version": "10.9.2",
+      "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz",
+      "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@cspotcode/source-map-support": "^0.8.0",
+        "@tsconfig/node10": "^1.0.7",
+        "@tsconfig/node12": "^1.0.7",
+        "@tsconfig/node14": "^1.0.0",
+        "@tsconfig/node16": "^1.0.2",
+        "acorn": "^8.4.1",
+        "acorn-walk": "^8.1.1",
+        "arg": "^4.1.0",
+        "create-require": "^1.1.0",
+        "diff": "^4.0.1",
+        "make-error": "^1.1.1",
+        "v8-compile-cache-lib": "^3.0.1",
+        "yn": "3.1.1"
+      },
+      "bin": {
+        "ts-node": "dist/bin.js",
+        "ts-node-cwd": "dist/bin-cwd.js",
+        "ts-node-esm": "dist/bin-esm.js",
+        "ts-node-script": "dist/bin-script.js",
+        "ts-node-transpile-only": "dist/bin-transpile.js",
+        "ts-script": "dist/bin-script-deprecated.js"
+      },
+      "peerDependencies": {
+        "@swc/core": ">=1.2.50",
+        "@swc/wasm": ">=1.2.50",
+        "@types/node": "*",
+        "typescript": ">=2.7"
+      },
+      "peerDependenciesMeta": {
+        "@swc/core": {
+          "optional": true
+        },
+        "@swc/wasm": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/tsconfig-paths": {
+      "version": "3.15.0",
+      "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.15.0.tgz",
+      "integrity": "sha512-2Ac2RgzDe/cn48GvOe3M+o82pEFewD3UPbyoUHHdKasHwJKjds4fLXWf/Ux5kATBKN20oaFGu+jbElp1pos0mg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/json5": "^0.0.29",
+        "json5": "^1.0.2",
+        "minimist": "^1.2.6",
+        "strip-bom": "^3.0.0"
+      }
+    },
+    "node_modules/tslib": {
+      "version": "2.8.1",
+      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
+      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
+      "license": "0BSD"
+    },
+    "node_modules/tw-animate-css": {
+      "version": "1.2.9",
+      "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz",
+      "integrity": "sha512-9O4k1at9pMQff9EAcCEuy1UNO43JmaPQvq+0lwza9Y0BQ6LB38NiMj+qHqjoQf40355MX+gs6wtlR6H9WsSXFg==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/Wombosvideo"
+      }
+    },
+    "node_modules/type-check": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
+      "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "prelude-ls": "^1.2.1"
+      },
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/type-detect": {
+      "version": "4.0.8",
+      "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz",
+      "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/type-fest": {
+      "version": "0.21.3",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz",
+      "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==",
+      "dev": true,
+      "license": "(MIT OR CC0-1.0)",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/type-is": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.0.1.tgz",
+      "integrity": "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "content-type": "^1.0.5",
+        "media-typer": "^1.1.0",
+        "mime-types": "^3.0.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/typed-array-buffer": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.3.tgz",
+      "integrity": "sha512-nAYYwfY3qnzX30IkA6AQZjVbtK6duGontcQm1WSG1MD94YLqK0515GNApXkoxKOWMusVssAHWLh9SeaoefYFGw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "es-errors": "^1.3.0",
+        "is-typed-array": "^1.1.14"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/typed-array-byte-length": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.3.tgz",
+      "integrity": "sha512-BaXgOuIxz8n8pIq3e7Atg/7s+DpiYrxn4vdot3w9KbnBhcRQq6o3xemQdIfynqSeXeDrF32x+WvfzmOjPiY9lg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.8",
+        "for-each": "^0.3.3",
+        "gopd": "^1.2.0",
+        "has-proto": "^1.2.0",
+        "is-typed-array": "^1.1.14"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/typed-array-byte-offset": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.4.tgz",
+      "integrity": "sha512-bTlAFB/FBYMcuX81gbL4OcpH5PmlFHqlCCpAl8AlEzMz5k53oNDvN8p1PNOWLEmI2x4orp3raOFB51tv9X+MFQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "available-typed-arrays": "^1.0.7",
+        "call-bind": "^1.0.8",
+        "for-each": "^0.3.3",
+        "gopd": "^1.2.0",
+        "has-proto": "^1.2.0",
+        "is-typed-array": "^1.1.15",
+        "reflect.getprototypeof": "^1.0.9"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/typed-array-length": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.7.tgz",
+      "integrity": "sha512-3KS2b+kL7fsuk/eJZ7EQdnEmQoaho/r6KUef7hxvltNA5DR8NAUM+8wJMbJyZ4G9/7i3v5zPBIMN5aybAh2/Jg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind": "^1.0.7",
+        "for-each": "^0.3.3",
+        "gopd": "^1.0.1",
+        "is-typed-array": "^1.1.13",
+        "possible-typed-array-names": "^1.0.0",
+        "reflect.getprototypeof": "^1.0.6"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/typescript": {
+      "version": "5.8.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz",
+      "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
+    "node_modules/unbox-primitive": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.1.0.tgz",
+      "integrity": "sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.3",
+        "has-bigints": "^1.0.2",
+        "has-symbols": "^1.1.0",
+        "which-boxed-primitive": "^1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/undici-types": {
+      "version": "6.19.8",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
+      "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
+      "license": "MIT"
+    },
+    "node_modules/universalify": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz",
+      "integrity": "sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 4.0.0"
+      }
+    },
+    "node_modules/unpipe": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
+      "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/unrs-resolver": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/unrs-resolver/-/unrs-resolver-1.7.2.tgz",
+      "integrity": "sha512-BBKpaylOW8KbHsu378Zky/dGh4ckT/4NW/0SHRABdqRLcQJ2dAOjDo9g97p04sWflm0kqPqpUatxReNV/dqI5A==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "dependencies": {
+        "napi-postinstall": "^0.2.2"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/JounQin"
+      },
+      "optionalDependencies": {
+        "@unrs/resolver-binding-darwin-arm64": "1.7.2",
+        "@unrs/resolver-binding-darwin-x64": "1.7.2",
+        "@unrs/resolver-binding-freebsd-x64": "1.7.2",
+        "@unrs/resolver-binding-linux-arm-gnueabihf": "1.7.2",
+        "@unrs/resolver-binding-linux-arm-musleabihf": "1.7.2",
+        "@unrs/resolver-binding-linux-arm64-gnu": "1.7.2",
+        "@unrs/resolver-binding-linux-arm64-musl": "1.7.2",
+        "@unrs/resolver-binding-linux-ppc64-gnu": "1.7.2",
+        "@unrs/resolver-binding-linux-riscv64-gnu": "1.7.2",
+        "@unrs/resolver-binding-linux-riscv64-musl": "1.7.2",
+        "@unrs/resolver-binding-linux-s390x-gnu": "1.7.2",
+        "@unrs/resolver-binding-linux-x64-gnu": "1.7.2",
+        "@unrs/resolver-binding-linux-x64-musl": "1.7.2",
+        "@unrs/resolver-binding-wasm32-wasi": "1.7.2",
+        "@unrs/resolver-binding-win32-arm64-msvc": "1.7.2",
+        "@unrs/resolver-binding-win32-ia32-msvc": "1.7.2",
+        "@unrs/resolver-binding-win32-x64-msvc": "1.7.2"
+      }
+    },
+    "node_modules/update-browserslist-db": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.3.tgz",
+      "integrity": "sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "escalade": "^3.2.0",
+        "picocolors": "^1.1.1"
+      },
+      "bin": {
+        "update-browserslist-db": "cli.js"
+      },
+      "peerDependencies": {
+        "browserslist": ">= 4.21.0"
+      }
+    },
+    "node_modules/uri-js": {
+      "version": "4.4.1",
+      "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
+      "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "punycode": "^2.1.0"
+      }
+    },
+    "node_modules/url-parse": {
+      "version": "1.5.10",
+      "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
+      "integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "querystringify": "^2.1.1",
+        "requires-port": "^1.0.0"
+      }
+    },
+    "node_modules/use-callback-ref": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.3.tgz",
+      "integrity": "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/use-sidecar": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.3.tgz",
+      "integrity": "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==",
+      "license": "MIT",
+      "dependencies": {
+        "detect-node-es": "^1.1.0",
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/v8-compile-cache-lib": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
+      "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/v8-to-istanbul": {
+      "version": "9.3.0",
+      "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.3.0.tgz",
+      "integrity": "sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "@jridgewell/trace-mapping": "^0.3.12",
+        "@types/istanbul-lib-coverage": "^2.0.1",
+        "convert-source-map": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10.12.0"
+      }
+    },
+    "node_modules/vary": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
+      "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/w3c-xmlserializer": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-4.0.0.tgz",
+      "integrity": "sha512-d+BFHzbiCx6zGfz0HyQ6Rg69w9k19nviJspaj4yNscGjrHu94sVP+aRm75yEbCh+r2/yR+7q6hux9LVtbuTGBw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "xml-name-validator": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=14"
+      }
+    },
+    "node_modules/walker": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.8.tgz",
+      "integrity": "sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "makeerror": "1.0.12"
+      }
+    },
+    "node_modules/web-streams-polyfill": {
+      "version": "4.0.0-beta.3",
+      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
+      "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/webidl-conversions": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
+      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
+      "license": "BSD-2-Clause"
+    },
+    "node_modules/whatwg-encoding": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-2.0.0.tgz",
+      "integrity": "sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "iconv-lite": "0.6.3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/whatwg-mimetype": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-3.0.0.tgz",
+      "integrity": "sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/whatwg-url": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
+      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+      "license": "MIT",
+      "dependencies": {
+        "tr46": "~0.0.3",
+        "webidl-conversions": "^3.0.0"
+      }
+    },
+    "node_modules/which": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
+      "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "isexe": "^2.0.0"
+      },
+      "bin": {
+        "node-which": "bin/node-which"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/which-boxed-primitive": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.1.1.tgz",
+      "integrity": "sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-bigint": "^1.1.0",
+        "is-boolean-object": "^1.2.1",
+        "is-number-object": "^1.1.1",
+        "is-string": "^1.1.1",
+        "is-symbol": "^1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/which-builtin-type": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/which-builtin-type/-/which-builtin-type-1.2.1.tgz",
+      "integrity": "sha512-6iBczoX+kDQ7a3+YJBnh3T+KZRxM/iYNPXicqk66/Qfm1b93iu+yOImkg0zHbj5LNOcNv1TEADiZ0xa34B4q6Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "function.prototype.name": "^1.1.6",
+        "has-tostringtag": "^1.0.2",
+        "is-async-function": "^2.0.0",
+        "is-date-object": "^1.1.0",
+        "is-finalizationregistry": "^1.1.0",
+        "is-generator-function": "^1.0.10",
+        "is-regex": "^1.2.1",
+        "is-weakref": "^1.0.2",
+        "isarray": "^2.0.5",
+        "which-boxed-primitive": "^1.1.0",
+        "which-collection": "^1.0.2",
+        "which-typed-array": "^1.1.16"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/which-collection": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/which-collection/-/which-collection-1.0.2.tgz",
+      "integrity": "sha512-K4jVyjnBdgvc86Y6BkaLZEN933SwYOuBFkdmBu9ZfkcAbdVbpITnDmjvZ/aQjRXQrv5EPkTnD1s39GiiqbngCw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-map": "^2.0.3",
+        "is-set": "^2.0.3",
+        "is-weakmap": "^2.0.2",
+        "is-weakset": "^2.0.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/which-typed-array": {
+      "version": "1.1.19",
+      "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.19.tgz",
+      "integrity": "sha512-rEvr90Bck4WZt9HHFC4DJMsjvu7x+r6bImz0/BrbWb7A2djJ8hnZMrWnHo9F8ssv0OMErasDhftrfROTyqSDrw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "available-typed-arrays": "^1.0.7",
+        "call-bind": "^1.0.8",
+        "call-bound": "^1.0.4",
+        "for-each": "^0.3.5",
+        "get-proto": "^1.0.1",
+        "gopd": "^1.2.0",
+        "has-tostringtag": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/word-wrap": {
+      "version": "1.2.5",
+      "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz",
+      "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/wrap-ansi": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
+      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "string-width": "^4.1.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
+    "node_modules/wrappy": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
+      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/write-file-atomic": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-4.0.2.tgz",
+      "integrity": "sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "imurmurhash": "^0.1.4",
+        "signal-exit": "^3.0.7"
+      },
+      "engines": {
+        "node": "^12.13.0 || ^14.15.0 || >=16.0.0"
+      }
+    },
+    "node_modules/ws": {
+      "version": "8.18.2",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz",
+      "integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==",
+      "devOptional": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/xml-name-validator": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-4.0.0.tgz",
+      "integrity": "sha512-ICP2e+jsHvAj2E2lIHxa5tjXRlKDJo4IdvPvCXbXQGdzSfmSpNVyIKMvoZHjDY9DP0zV17iI85o90vRFXNccRw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/xmlchars": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",
+      "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/y18n": {
+      "version": "5.0.8",
+      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
+      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/yallist": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
+      "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
+      "dev": true,
+      "license": "BlueOak-1.0.0",
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/yargs": {
+      "version": "17.7.2",
+      "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
+      "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "cliui": "^8.0.1",
+        "escalade": "^3.1.1",
+        "get-caller-file": "^2.0.5",
+        "require-directory": "^2.1.1",
+        "string-width": "^4.2.3",
+        "y18n": "^5.0.5",
+        "yargs-parser": "^21.1.1"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yargs-parser": {
+      "version": "21.1.1",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
+      "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yn": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz",
+      "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/yocto-queue": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
+      "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/zod": {
+      "version": "3.24.4",
+      "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.4.tgz",
+      "integrity": "sha512-OdqJE9UDRPwWsrHjLN2F8bPxvwJBK22EHLWtanu0LSYr5YqzsaaW3RMgmjwr8Rypg5k+meEJdSPXJZXE/yqOMg==",
+      "devOptional": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/colinhacks"
+      }
+    },
+    "node_modules/zod-to-json-schema": {
+      "version": "3.24.5",
+      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.5.tgz",
+      "integrity": "sha512-/AuWwMP+YqiPbsJx5D6TfgRTc4kTLjsh5SOcd4bLsfUg2RcEXrFMJl1DGgdHy2aCfsIA/cr/1JM0xcB2GZji8g==",
+      "dev": true,
+      "license": "ISC",
+      "peerDependencies": {
+        "zod": "^3.24.1"
+      }
+    }
+  }
+}
diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json
new file mode 100644
index 000000000..c612a8078
--- /dev/null
+++ b/llama_stack/ui/package.json
@@ -0,0 +1,52 @@
+{
+  "name": "ui",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev --turbopack --port ${LLAMA_STACK_UI_PORT:-8322}",
+    "build": "next build",
+    "start": "next start",
+    "lint": "next lint",
+    "format": "prettier --write \"./**/*.{ts,tsx}\"",
+    "format:check": "prettier --check \"./**/*.{ts,tsx}\"",
+    "test": "jest"
+  },
+  "dependencies": {
+    "@radix-ui/react-dialog": "^1.1.13",
+    "@radix-ui/react-dropdown-menu": "^2.1.14",
+    "@radix-ui/react-separator": "^1.1.6",
+    "@radix-ui/react-slot": "^1.2.2",
+    "@radix-ui/react-tooltip": "^1.2.6",
+    "class-variance-authority": "^0.7.1",
+    "clsx": "^2.1.1",
+    "llama-stack-client": "0.2.9",
+    "lucide-react": "^0.510.0",
+    "next": "15.3.2",
+    "next-themes": "^0.4.6",
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
+    "tailwind-merge": "^3.3.0"
+  },
+  "devDependencies": {
+    "@eslint/eslintrc": "^3",
+    "@tailwindcss/postcss": "^4",
+    "@testing-library/dom": "^10.4.0",
+    "@testing-library/jest-dom": "^6.6.3",
+    "@testing-library/react": "^16.3.0",
+    "@types/jest": "^29.5.14",
+    "@types/node": "^20",
+    "@types/react": "^19",
+    "@types/react-dom": "^19",
+    "eslint": "^9",
+    "eslint-config-next": "15.3.2",
+    "eslint-config-prettier": "^10.1.5",
+    "eslint-plugin-prettier": "^5.4.0",
+    "jest": "^29.7.0",
+    "jest-environment-jsdom": "^29.7.0",
+    "prettier": "3.5.3",
+    "tailwindcss": "^4",
+    "ts-node": "^10.9.2",
+    "tw-animate-css": "^1.2.9",
+    "typescript": "^5"
+  }
+}
diff --git a/llama_stack/ui/postcss.config.mjs b/llama_stack/ui/postcss.config.mjs
new file mode 100644
index 000000000..c7bcb4b1e
--- /dev/null
+++ b/llama_stack/ui/postcss.config.mjs
@@ -0,0 +1,5 @@
+const config = {
+  plugins: ["@tailwindcss/postcss"],
+};
+
+export default config;
diff --git a/llama_stack/ui/public/file.svg b/llama_stack/ui/public/file.svg
new file mode 100644
index 000000000..004145cdd
--- /dev/null
+++ b/llama_stack/ui/public/file.svg
@@ -0,0 +1 @@
+<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>
\ No newline at end of file
diff --git a/llama_stack/ui/public/globe.svg b/llama_stack/ui/public/globe.svg
new file mode 100644
index 000000000..567f17b0d
--- /dev/null
+++ b/llama_stack/ui/public/globe.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
\ No newline at end of file
diff --git a/llama_stack/ui/public/next.svg b/llama_stack/ui/public/next.svg
new file mode 100644
index 000000000..5174b28c5
--- /dev/null
+++ b/llama_stack/ui/public/next.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
\ No newline at end of file
diff --git a/llama_stack/ui/public/vercel.svg b/llama_stack/ui/public/vercel.svg
new file mode 100644
index 000000000..770539603
--- /dev/null
+++ b/llama_stack/ui/public/vercel.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
\ No newline at end of file
diff --git a/llama_stack/ui/public/window.svg b/llama_stack/ui/public/window.svg
new file mode 100644
index 000000000..b2b2a44f6
--- /dev/null
+++ b/llama_stack/ui/public/window.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>
\ No newline at end of file
diff --git a/llama_stack/ui/tsconfig.json b/llama_stack/ui/tsconfig.json
new file mode 100644
index 000000000..d8b93235f
--- /dev/null
+++ b/llama_stack/ui/tsconfig.json
@@ -0,0 +1,27 @@
+{
+  "compilerOptions": {
+    "target": "ES2017",
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "preserve",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./*"]
+    }
+  },
+  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
+  "exclude": ["node_modules"]
+}
diff --git a/playground_start_local.sh b/playground_start_local.sh
new file mode 100755
index 000000000..fb2ba0cd5
--- /dev/null
+++ b/playground_start_local.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+export KEYCLOAK_URL="https://iam.phoenix-systems.ch"
+export KEYCLOAK_REALM="kvant"
+export KEYCLOAK_CLIENT_ID="llama-stack-playground"
+
+uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
diff --git a/pyproject.toml b/pyproject.toml
index 4f350e2e9..bec5cc487 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "llama_stack"
-version = "0.2.2rc1"
+version = "0.2.9"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
@@ -21,32 +21,46 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Information Analysis",
 ]
 dependencies = [
-    "blobfile",
+    "aiohttp",
     "fire",
     "httpx",
     "huggingface-hub",
     "jinja2>=3.1.6",
     "jsonschema",
-    "llama-stack-client>=0.2.2rc1",
+    "llama-stack-client>=0.2.9",
     "openai>=1.66",
     "prompt-toolkit",
     "python-dotenv",
+    "python-jose",
     "pydantic>=2",
     "requests",
     "rich",
     "setuptools",
+    "starlette",
     "termcolor",
     "tiktoken",
     "pillow",
+    "h11>=0.16.0",
 ]
 
 [project.optional-dependencies]
+ui = [
+    "streamlit",
+    "pandas",
+    "llama-stack-client>=0.2.9",
+    "streamlit-option-menu",
+    "streamlit-keycloak",
+]
+
+[dependency-groups]
 dev = [
     "pytest",
+    "pytest-timeout",
     "pytest-asyncio",
     "pytest-cov",
     "pytest-html",
-    "nbval",            # For notebook testing
+    "pytest-json-report",
+    "nbval",              # For notebook testing
     "black",
     "ruff",
     "types-requests",
@@ -54,13 +68,26 @@ dev = [
     "pre-commit",
     "uvicorn",
     "fastapi",
-    "ruamel.yaml",      # needed for openapi generator
+    "ruamel.yaml",        # needed for openapi generator
 ]
 # These are the dependencies required for running unit tests.
-unit = ["sqlite-vec", "openai", "aiosqlite", "aiohttp", "pypdf", "chardet", "qdrant-client"]
+unit = [
+    "sqlite-vec",
+    "openai",
+    "aiosqlite",
+    "aiohttp",
+    "pypdf",
+    "mcp",
+    "chardet",
+    "qdrant-client",
+    "opentelemetry-exporter-otlp-proto-http",
+    "sqlalchemy",
+    "sqlalchemy[asyncio]>=2.0.41",
+    "blobfile",
+]
 # These are the core dependencies required for running integration tests. They are shared across all
 # providers. If a provider requires additional dependencies, please add them to your environment
-# separately. If you are using "uv" to execute your tests, you can use the "--with" flag to specify extra
+# separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
 # dependencies.
 test = [
     "openai",
@@ -75,10 +102,14 @@ test = [
     "mcp",
     "datasets",
     "autoevals",
+    "transformers",
+    "sqlalchemy",
+    "sqlalchemy[asyncio]>=2.0.41",
 ]
 docs = [
     "sphinx-autobuild",
     "myst-parser",
+    "sphinx",
     "sphinx-rtd-theme",
     "sphinx_rtd_dark_mode",
     "sphinx-copybutton",
@@ -88,14 +119,10 @@ docs = [
     "sphinxcontrib.video",
     "sphinxcontrib.mermaid",
     "tomli",
+    "linkify",
+    "sphinxcontrib.openapi",
 ]
 codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
-ui = [
-    "streamlit",
-    "pandas",
-    "llama-stack-client>=0.2.2rc1",
-    "streamlit-option-menu",
-]
 
 [project.urls]
 Homepage = "https://github.com/meta-llama/llama-stack"
@@ -132,15 +159,26 @@ exclude = [
 
 [tool.ruff.lint]
 select = [
-    "B",   # flake8-bugbear
-    "B9",  # flake8-bugbear subset
-    "C",   # comprehensions
-    "E",   # pycodestyle
-    "F",   # Pyflakes
-    "N",   # Naming
-    "W",   # Warnings
-    "DTZ", # datetime rules
-    "I",   # isort (imports order)
+    "UP",      # pyupgrade
+    "B",       # flake8-bugbear
+    "B9",      # flake8-bugbear subset
+    "C",       # comprehensions
+    "E",       # pycodestyle
+    "F",       # Pyflakes
+    "N",       # Naming
+    "W",       # Warnings
+    "DTZ",     # datetime rules
+    "I",       # isort (imports order)
+    "RUF001",  # Checks for ambiguous Unicode characters in strings
+    "RUF002",  # Checks for ambiguous Unicode characters in docstrings
+    "RUF003",  # Checks for ambiguous Unicode characters in comments
+    "PLC2401", # Checks for the use of non-ASCII characters in variable names
+    "PLC2403", # Checks for the use of non-ASCII characters in import statements
+    "PLE2510", # Checks for strings that contain the control character BS.
+    "PLE2512", # Checks for strings that contain the raw control character SUB.
+    "PLE2513", # Checks for strings that contain the raw control character ESC.
+    "PLE2514", # Checks for strings that contain the raw control character NUL (0 byte).
+    "PLE2515", # Checks for strings that contain the zero width space character.
 ]
 ignore = [
     # The following ignores are desired by the project maintainers.
@@ -153,10 +191,18 @@ ignore = [
     # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
     "C901", # Complexity of the function is too high
 ]
+unfixable = [
+    "PLE2515",
+] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually
 
 # Ignore the following errors for the following files
 [tool.ruff.lint.per-file-ignores]
 "tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
+"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
+"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
+    "RUF001",
+    "PLE2515",
+]
 
 [tool.mypy]
 mypy_path = ["llama_stack"]
@@ -171,58 +217,25 @@ follow_imports = "silent"
 # to exclude the entire directory.
 exclude = [
     # As we fix more and more of these, we should remove them from the list
-    "^llama_stack/apis/agents/agents\\.py$",
-    "^llama_stack/apis/batch_inference/batch_inference\\.py$",
-    "^llama_stack/apis/benchmarks/benchmarks\\.py$",
-    "^llama_stack/apis/common/content_types\\.py$",
     "^llama_stack/apis/common/training_types\\.py$",
-    "^llama_stack/apis/datasetio/datasetio\\.py$",
-    "^llama_stack/apis/datasets/datasets\\.py$",
-    "^llama_stack/apis/eval/eval\\.py$",
-    "^llama_stack/apis/files/files\\.py$",
-    "^llama_stack/apis/inference/inference\\.py$",
-    "^llama_stack/apis/inspect/inspect\\.py$",
-    "^llama_stack/apis/models/models\\.py$",
-    "^llama_stack/apis/post_training/post_training\\.py$",
-    "^llama_stack/apis/providers/providers\\.py$",
-    "^llama_stack/apis/resource\\.py$",
-    "^llama_stack/apis/safety/safety\\.py$",
-    "^llama_stack/apis/scoring/scoring\\.py$",
-    "^llama_stack/apis/scoring_functions/scoring_functions\\.py$",
-    "^llama_stack/apis/shields/shields\\.py$",
-    "^llama_stack/apis/synthetic_data_generation/synthetic_data_generation\\.py$",
-    "^llama_stack/apis/telemetry/telemetry\\.py$",
-    "^llama_stack/apis/tools/rag_tool\\.py$",
-    "^llama_stack/apis/tools/tools\\.py$",
-    "^llama_stack/apis/vector_dbs/vector_dbs\\.py$",
-    "^llama_stack/apis/vector_io/vector_io\\.py$",
     "^llama_stack/cli/download\\.py$",
-    "^llama_stack/cli/llama\\.py$",
     "^llama_stack/cli/stack/_build\\.py$",
-    "^llama_stack/cli/stack/list_providers\\.py$",
     "^llama_stack/distribution/build\\.py$",
     "^llama_stack/distribution/client\\.py$",
-    "^llama_stack/distribution/configure\\.py$",
-    "^llama_stack/distribution/library_client\\.py$",
     "^llama_stack/distribution/request_headers\\.py$",
     "^llama_stack/distribution/routers/",
+    "^llama_stack/distribution/routing_tables/",
     "^llama_stack/distribution/server/endpoints\\.py$",
     "^llama_stack/distribution/server/server\\.py$",
-    "^llama_stack/distribution/server/websocket_server\\.py$",
     "^llama_stack/distribution/stack\\.py$",
     "^llama_stack/distribution/store/registry\\.py$",
-    "^llama_stack/distribution/ui/page/playground/chat\\.py$",
     "^llama_stack/distribution/utils/exec\\.py$",
     "^llama_stack/distribution/utils/prompt_for_config\\.py$",
-    "^llama_stack/models/llama/datatypes\\.py$",
     "^llama_stack/models/llama/llama3/chat_format\\.py$",
     "^llama_stack/models/llama/llama3/interface\\.py$",
-    "^llama_stack/models/llama/llama3/prompt_templates/system_prompts\\.py$",
     "^llama_stack/models/llama/llama3/tokenizer\\.py$",
     "^llama_stack/models/llama/llama3/tool_utils\\.py$",
     "^llama_stack/models/llama/llama3_3/prompts\\.py$",
-    "^llama_stack/models/llama/llama4/",
-    "^llama_stack/models/llama/sku_list\\.py$",
     "^llama_stack/providers/inline/agents/meta_reference/",
     "^llama_stack/providers/inline/agents/meta_reference/agent_instance\\.py$",
     "^llama_stack/providers/inline/agents/meta_reference/agents\\.py$",
@@ -249,6 +262,7 @@ exclude = [
     "^llama_stack/providers/inline/scoring/llm_as_judge/",
     "^llama_stack/providers/remote/agents/sample/",
     "^llama_stack/providers/remote/datasetio/huggingface/",
+    "^llama_stack/providers/remote/datasetio/nvidia/",
     "^llama_stack/providers/remote/inference/anthropic/",
     "^llama_stack/providers/remote/inference/bedrock/",
     "^llama_stack/providers/remote/inference/cerebras/",
@@ -264,8 +278,10 @@ exclude = [
     "^llama_stack/providers/remote/inference/sample/",
     "^llama_stack/providers/remote/inference/tgi/",
     "^llama_stack/providers/remote/inference/together/",
+    "^llama_stack/providers/remote/inference/watsonx/",
     "^llama_stack/providers/remote/safety/bedrock/",
     "^llama_stack/providers/remote/safety/nvidia/",
+    "^llama_stack/providers/remote/safety/sambanova/",
     "^llama_stack/providers/remote/safety/sample/",
     "^llama_stack/providers/remote/tool_runtime/bing_search/",
     "^llama_stack/providers/remote/tool_runtime/brave_search/",
@@ -299,14 +315,13 @@ exclude = [
     "^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
     "^llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
     "^llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^llama_stack/scripts/",
     "^llama_stack/strong_typing/auxiliary\\.py$",
     "^llama_stack/strong_typing/deserializer\\.py$",
     "^llama_stack/strong_typing/inspection\\.py$",
     "^llama_stack/strong_typing/schema\\.py$",
     "^llama_stack/strong_typing/serializer\\.py$",
-    "^llama_stack/templates/dev/dev\\.py$",
     "^llama_stack/templates/groq/groq\\.py$",
+    "^llama_stack/templates/llama_api/llama_api\\.py$",
     "^llama_stack/templates/sambanova/sambanova\\.py$",
     "^llama_stack/templates/template\\.py$",
 ]
@@ -320,3 +335,6 @@ ignore_missing_imports = true
 init_forbid_extra = true
 init_typed = true
 warn_required_dynamic_aliases = true
+
+[tool.ruff.lint.pep8-naming]
+classmethod-decorators = ["classmethod", "pydantic.field_validator"]
diff --git a/requirements.txt b/requirements.txt
index ef5782905..d9593c6b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,59 +1,203 @@
 # This file was autogenerated by uv via the following command:
-#    uv export --frozen --no-hashes --no-emit-project --output-file=requirements.txt
+#    uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
+aiohappyeyeballs==2.5.0
+    # via aiohttp
+aiohttp==3.11.13
+    # via llama-stack
+aiosignal==1.3.2
+    # via aiohttp
 annotated-types==0.7.0
+    # via pydantic
 anyio==4.8.0
+    # via
+    #   httpx
+    #   llama-stack-client
+    #   openai
+    #   starlette
+async-timeout==5.0.1 ; python_full_version < '3.11'
+    # via aiohttp
 attrs==25.1.0
-blobfile==3.0.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
 certifi==2025.1.31
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
 charset-normalizer==3.4.1
+    # via requests
 click==8.1.8
+    # via llama-stack-client
 colorama==0.4.6 ; sys_platform == 'win32'
+    # via
+    #   click
+    #   tqdm
 distro==1.9.0
+    # via
+    #   llama-stack-client
+    #   openai
+ecdsa==0.19.1
+    # via python-jose
 exceptiongroup==1.2.2 ; python_full_version < '3.11'
+    # via anyio
 filelock==3.17.0
+    # via huggingface-hub
 fire==0.7.0
+    # via llama-stack
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
 fsspec==2024.12.0
-h11==0.14.0
-httpcore==1.0.7
+    # via huggingface-hub
+h11==0.16.0
+    # via
+    #   httpcore
+    #   llama-stack
+httpcore==1.0.9
+    # via httpx
 httpx==0.28.1
+    # via
+    #   llama-stack
+    #   llama-stack-client
+    #   openai
 huggingface-hub==0.29.0
+    # via llama-stack
 idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
 jinja2==3.1.6
+    # via llama-stack
 jiter==0.8.2
+    # via openai
 jsonschema==4.23.0
+    # via llama-stack
 jsonschema-specifications==2024.10.1
-llama-stack-client==0.2.1
-lxml==5.3.1
+    # via jsonschema
+llama-stack-client==0.2.9
+    # via llama-stack
 markdown-it-py==3.0.0
+    # via rich
 markupsafe==3.0.2
+    # via jinja2
 mdurl==0.1.2
+    # via markdown-it-py
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
 numpy==2.2.3
+    # via pandas
 openai==1.71.0
+    # via llama-stack
 packaging==24.2
+    # via huggingface-hub
 pandas==2.2.3
+    # via llama-stack-client
 pillow==11.1.0
+    # via llama-stack
 prompt-toolkit==3.0.50
+    # via
+    #   llama-stack
+    #   llama-stack-client
+propcache==0.3.0
+    # via
+    #   aiohttp
+    #   yarl
 pyaml==25.1.0
-pycryptodomex==3.21.0
+    # via llama-stack-client
+pyasn1==0.4.8
+    # via
+    #   python-jose
+    #   rsa
 pydantic==2.10.6
+    # via
+    #   llama-stack
+    #   llama-stack-client
+    #   openai
 pydantic-core==2.27.2
+    # via pydantic
 pygments==2.19.1
+    # via rich
 python-dateutil==2.9.0.post0
+    # via pandas
 python-dotenv==1.0.1
+    # via llama-stack
+python-jose==3.4.0
+    # via llama-stack
 pytz==2025.1
+    # via pandas
 pyyaml==6.0.2
+    # via
+    #   huggingface-hub
+    #   pyaml
 referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
 regex==2024.11.6
+    # via tiktoken
 requests==2.32.3
+    # via
+    #   huggingface-hub
+    #   llama-stack
+    #   tiktoken
 rich==13.9.4
+    # via
+    #   llama-stack
+    #   llama-stack-client
 rpds-py==0.22.3
-setuptools==75.8.0
+    # via
+    #   jsonschema
+    #   referencing
+rsa==4.9
+    # via python-jose
+setuptools==80.8.0
+    # via llama-stack
 six==1.17.0
+    # via
+    #   ecdsa
+    #   python-dateutil
 sniffio==1.3.1
+    # via
+    #   anyio
+    #   llama-stack-client
+    #   openai
+starlette==0.45.3
+    # via llama-stack
 termcolor==2.5.0
+    # via
+    #   fire
+    #   llama-stack
+    #   llama-stack-client
 tiktoken==0.9.0
+    # via llama-stack
 tqdm==4.67.1
+    # via
+    #   huggingface-hub
+    #   llama-stack-client
+    #   openai
 typing-extensions==4.12.2
+    # via
+    #   anyio
+    #   huggingface-hub
+    #   llama-stack-client
+    #   multidict
+    #   openai
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   rich
 tzdata==2025.1
+    # via pandas
 urllib3==2.3.0
+    # via requests
 wcwidth==0.2.13
+    # via prompt-toolkit
+yarl==1.18.3
+    # via aiohttp
diff --git a/scripts/check-workflows-use-hashes.sh b/scripts/check-workflows-use-hashes.sh
new file mode 100755
index 000000000..d508ce843
--- /dev/null
+++ b/scripts/check-workflows-use-hashes.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+#
+# Fails if any GitHub Actions workflow uses an external action without a full SHA pin.
+
+set -euo pipefail
+
+failed=0
+
+# Find all workflow YAML files
+for file in $(find .github/workflows/ -type f \( -name "*.yml" -o -name "*.yaml" \)); do
+    IFS=$'\n'
+    # Grep for `uses:` lines that look like actions
+    for line in $(grep -E '^.*uses:[^@]+@[^ ]+' "$file"); do
+        # Extract the ref part after the last @
+        ref=$(echo "$line" | sed -E 's/.*@([A-Za-z0-9._-]+).*/\1/')
+        # Check if ref is a 40-character hex string (full SHA).
+        #
+        # Note: strictly speaking, this could also be a tag or branch name, but
+        # we'd have to pull this info from the remote. Meh.
+        if ! [[ $ref =~ ^[0-9a-fA-F]{40}$ ]]; then
+            echo "ERROR: $file uses non-SHA action ref: $line"
+            failed=1
+        fi
+    done
+done
+
+exit $failed
diff --git a/scripts/distro_codegen.py b/scripts/distro_codegen.py
index 98faa53a3..d33c5de67 100755
--- a/scripts/distro_codegen.py
+++ b/scripts/distro_codegen.py
@@ -7,12 +7,11 @@
 
 import concurrent.futures
 import importlib
-import json
 import subprocess
 import sys
+from collections.abc import Iterable
 from functools import partial
 from pathlib import Path
-from typing import Iterable
 
 from rich.progress import Progress, SpinnerColumn, TextColumn
 
@@ -98,7 +97,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str | None, list[
 
         if template_func := getattr(module, "get_distribution_template", None):
             template = template_func()
-            normal_deps, special_deps = get_provider_dependencies(template.providers)
+            normal_deps, special_deps = get_provider_dependencies(template)
             # Combine all dependencies in order: normal deps, special deps, server deps
             all_deps = sorted(set(normal_deps + SERVER_DEPENDENCIES)) + sorted(set(special_deps))
 
@@ -108,19 +107,11 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str | None, list[
     return None, []
 
 
-def generate_dependencies_file(change_tracker: ChangedPathTracker):
-    templates_dir = REPO_ROOT / "llama_stack" / "templates"
-    distribution_deps = {}
-
-    for template_dir in find_template_dirs(templates_dir):
-        name, deps = collect_template_dependencies(template_dir)
-        if name:
-            distribution_deps[name] = deps
-
-    deps_file = REPO_ROOT / "llama_stack" / "templates" / "dependencies.json"
-    change_tracker.add_paths(deps_file)
-    with open(deps_file, "w") as f:
-        f.write(json.dumps(distribution_deps, indent=2) + "\n")
+def pre_import_templates(template_dirs: list[Path]) -> None:
+    # Pre-import all template modules to avoid deadlocks.
+    for template_dir in template_dirs:
+        module_name = f"llama_stack.templates.{template_dir.name}"
+        importlib.import_module(module_name)
 
 
 def main():
@@ -134,6 +125,8 @@ def main():
         template_dirs = list(find_template_dirs(templates_dir))
         task = progress.add_task("Processing distribution templates...", total=len(template_dirs))
 
+        pre_import_templates(template_dirs)
+
         # Create a partial function with the progress bar
         process_func = partial(process_template, progress=progress, change_tracker=change_tracker)
 
@@ -143,8 +136,6 @@ def main():
             list(executor.map(process_func, template_dirs))
             progress.update(task, advance=len(template_dirs))
 
-    generate_dependencies_file(change_tracker)
-
     if check_for_changes(change_tracker):
         print(
             "Distribution template changes detected. Please commit the changes.",
diff --git a/scripts/unit-tests.sh b/scripts/unit-tests.sh
index 5cfaa989b..771205150 100755
--- a/scripts/unit-tests.sh
+++ b/scripts/unit-tests.sh
@@ -10,10 +10,10 @@ PYTHON_VERSION=${PYTHON_VERSION:-3.10}
 
 command -v uv >/dev/null 2>&1 || { echo >&2 "uv is required but it's not installed. Exiting."; exit 1; }
 
-uv python find $PYTHON_VERSION
+uv python find "$PYTHON_VERSION"
 FOUND_PYTHON=$?
 if [ $FOUND_PYTHON -ne 0 ]; then
-     uv python install $PYTHON_VERSION
+     uv python install "$PYTHON_VERSION"
 fi
 
-uv run --python $PYTHON_VERSION --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest --asyncio-mode=auto -s -v tests/unit/ $@
+uv run --python "$PYTHON_VERSION" --with-editable . --group unit pytest --asyncio-mode=auto -s -v tests/unit/ $@
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 000000000..ed7064bfb
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,9 @@
+# Llama Stack Tests
+
+Llama Stack has multiple layers of testing done to ensure continuous functionality and prevent regressions to the codebase.
+
+| Testing Type | Details |
+|--------------|---------|
+| Unit | [unit/README.md](unit/README.md) |
+| Integration | [integration/README.md](integration/README.md) |
+| Verification | [verifications/README.md](verifications/README.md) |
diff --git a/tests/common/mcp.py b/tests/common/mcp.py
new file mode 100644
index 000000000..fd7040c6c
--- /dev/null
+++ b/tests/common/mcp.py
@@ -0,0 +1,145 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# we want the mcp server to be authenticated OR not, depends
+from contextlib import contextmanager
+
+# Unfortunately the toolgroup id must be tied to the tool names because the registry
+# indexes on both toolgroups and tools independently (and not jointly). That really
+# needs to be fixed.
+MCP_TOOLGROUP_ID = "mcp::localmcp"
+
+
+@contextmanager
+def make_mcp_server(required_auth_token: str | None = None):
+    import threading
+    import time
+
+    import httpx
+    import uvicorn
+    from mcp import types
+    from mcp.server.fastmcp import Context, FastMCP
+    from mcp.server.sse import SseServerTransport
+    from starlette.applications import Starlette
+    from starlette.responses import Response
+    from starlette.routing import Mount, Route
+
+    server = FastMCP("FastMCP Test Server", log_level="WARNING")
+
+    @server.tool()
+    async def greet_everyone(
+        url: str, ctx: Context
+    ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
+        return [types.TextContent(type="text", text="Hello, world!")]
+
+    @server.tool()
+    async def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
+        """
+        Returns the boiling point of a liquid in Celcius or Fahrenheit.
+
+        :param liquid_name: The name of the liquid
+        :param celcius: Whether to return the boiling point in Celcius
+        :return: The boiling point of the liquid in Celcius or Fahrenheit
+        """
+        if liquid_name.lower() == "polyjuice":
+            if celcius:
+                return -100
+            else:
+                return -212
+        else:
+            return -1
+
+    sse = SseServerTransport("/messages/")
+
+    async def handle_sse(request):
+        from starlette.exceptions import HTTPException
+
+        auth_header = request.headers.get("Authorization")
+        auth_token = None
+        if auth_header and auth_header.startswith("Bearer "):
+            auth_token = auth_header.split(" ")[1]
+
+        if required_auth_token and auth_token != required_auth_token:
+            raise HTTPException(status_code=401, detail="Unauthorized")
+
+        async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
+            await server._mcp_server.run(
+                streams[0],
+                streams[1],
+                server._mcp_server.create_initialization_options(),
+            )
+            return Response()
+
+    app = Starlette(
+        routes=[
+            Route("/sse", endpoint=handle_sse),
+            Mount("/messages/", app=sse.handle_post_message),
+        ],
+    )
+
+    def get_open_port():
+        import socket
+
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+            sock.bind(("", 0))
+            return sock.getsockname()[1]
+
+    port = get_open_port()
+
+    # make uvicorn logs be less verbose
+    config = uvicorn.Config(app, host="0.0.0.0", port=port, log_level="warning")
+    server_instance = uvicorn.Server(config)
+    app.state.uvicorn_server = server_instance
+
+    def run_server():
+        server_instance.run()
+
+    # Start the server in a new thread
+    server_thread = threading.Thread(target=run_server, daemon=True)
+    server_thread.start()
+
+    # Polling until the server is ready
+    timeout = 10
+    start_time = time.time()
+
+    server_url = f"http://localhost:{port}/sse"
+    while time.time() - start_time < timeout:
+        try:
+            response = httpx.get(server_url)
+            if response.status_code in [200, 401]:
+                break
+        except httpx.RequestError:
+            pass
+        time.sleep(0.1)
+
+    try:
+        yield {"server_url": server_url}
+    finally:
+        print("Telling SSE server to exit")
+        server_instance.should_exit = True
+        time.sleep(0.5)
+
+        # Force shutdown if still running
+        if server_thread.is_alive():
+            try:
+                if hasattr(server_instance, "servers") and server_instance.servers:
+                    for srv in server_instance.servers:
+                        srv.close()
+
+                # Wait for graceful shutdown
+                server_thread.join(timeout=3)
+                if server_thread.is_alive():
+                    print("Warning: Server thread still alive after shutdown attempt")
+            except Exception as e:
+                print(f"Error during server shutdown: {e}")
+
+        # CRITICAL: Reset SSE global state to prevent event loop contamination
+        # Reset the SSE AppStatus singleton that stores anyio.Event objects
+        from sse_starlette.sse import AppStatus
+
+        AppStatus.should_exit = False
+        AppStatus.should_exit_event = None
+        print("SSE server exited")
diff --git a/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml b/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
new file mode 100644
index 000000000..1f3ab3817
--- /dev/null
+++ b/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
@@ -0,0 +1,9 @@
+version: '2'
+distribution_spec:
+  description: Custom distro for CI tests
+  providers:
+    inference:
+    - remote::custom_ollama
+image_type: container
+image_name: ci-test
+external_providers_dir: ~/.llama/providers.d
diff --git a/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml b/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
index f0960b4d8..2ae1e2cf3 100644
--- a/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
@@ -1,6 +1,6 @@
 adapter:
   adapter_type: custom_ollama
-  pip_packages: ["ollama", "aiohttp"]
+  pip_packages: ["ollama", "aiohttp", "tests/external-provider/llama-stack-provider-ollama"]
   config_class: llama_stack_provider_ollama.config.OllamaImplConfig
   module: llama_stack_provider_ollama
 api_dependencies: []
diff --git a/tests/external-provider/llama-stack-provider-ollama/pyproject.toml b/tests/external-provider/llama-stack-provider-ollama/pyproject.toml
index ddebc54b0..715fa85e6 100644
--- a/tests/external-provider/llama-stack-provider-ollama/pyproject.toml
+++ b/tests/external-provider/llama-stack-provider-ollama/pyproject.toml
@@ -6,7 +6,6 @@ dependencies = [
     "aiohttp",
     "aiosqlite",
     "autoevals",
-    "blobfile",
     "chardet",
     "chromadb-client",
     "datasets",
diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml
index 7a3636c4d..158f6800f 100644
--- a/tests/external-provider/llama-stack-provider-ollama/run.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml
@@ -1,14 +1,10 @@
 version: '2'
 image_name: ollama
 apis:
-- agents
-- datasetio
-- eval
 - inference
-- safety
-- scoring
 - telemetry
 - tool_runtime
+- datasetio
 - vector_io
 providers:
   inference:
@@ -24,34 +20,13 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -67,17 +42,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
@@ -89,9 +53,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:}
       max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -126,10 +87,8 @@ tool_groups:
   provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
   provider_id: wolfram-alpha
 server:
   port: 8321
-external_providers_dir: /tmp/providers.d
+external_providers_dir: ~/.llama/providers.d
diff --git a/tests/integration/README.md b/tests/integration/README.md
index 92bcf7c51..31d58c83f 100644
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@@ -11,7 +11,7 @@ pytest --help
 Here are the most important options:
 - `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
   - a URL which points to a Llama Stack distribution server
-  - a template (e.g., `fireworks`, `together`) or a path to a run.yaml file
+  - a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
   - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
 - `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers.
 
@@ -28,7 +28,6 @@ if no model is specified.
 
 Experimental, under development, options:
 - `--record-responses`: record new API responses instead of using cached ones
-- `--report`: path where the test report should be written, e.g. --report=/path/to/report.md
 
 
 ## Examples
diff --git a/tests/integration/agents/test_agents.py b/tests/integration/agents/test_agents.py
index 7def55291..66c9ab829 100644
--- a/tests/integration/agents/test_agents.py
+++ b/tests/integration/agents/test_agents.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 from uuid import uuid4
 
 import pytest
@@ -37,7 +37,7 @@ def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
         return -1
 
 
-def get_boiling_point_with_metadata(liquid_name: str, celcius: bool = True) -> Dict[str, Any]:
+def get_boiling_point_with_metadata(liquid_name: str, celcius: bool = True) -> dict[str, Any]:
     """
     Returns the boiling point of a liquid in Celcius or Fahrenheit
 
@@ -56,8 +56,8 @@ def get_boiling_point_with_metadata(liquid_name: str, celcius: bool = True) -> D
 
 
 @pytest.fixture(scope="session")
-def agent_config(llama_stack_client_with_mocked_inference, text_model_id):
-    available_shields = [shield.identifier for shield in llama_stack_client_with_mocked_inference.shields.list()]
+def agent_config(llama_stack_client, text_model_id):
+    available_shields = [shield.identifier for shield in llama_stack_client.shields.list()]
     available_shields = available_shields[:1]
     agent_config = dict(
         model=text_model_id,
@@ -77,8 +77,8 @@ def agent_config(llama_stack_client_with_mocked_inference, text_model_id):
     return agent_config
 
 
-def test_agent_simple(llama_stack_client_with_mocked_inference, agent_config):
-    agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+def test_agent_simple(llama_stack_client, agent_config):
+    agent = Agent(llama_stack_client, **agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     simple_hello = agent.create_turn(
@@ -115,7 +115,71 @@ def test_agent_simple(llama_stack_client_with_mocked_inference, agent_config):
         assert "I can't" in logs_str
 
 
-def test_tool_config(llama_stack_client_with_mocked_inference, agent_config):
+def test_agent_name(llama_stack_client, text_model_id):
+    agent_name = f"test-agent-{uuid4()}"
+
+    try:
+        agent = Agent(
+            llama_stack_client,
+            model=text_model_id,
+            instructions="You are a helpful assistant",
+            name=agent_name,
+        )
+    except TypeError:
+        agent = Agent(
+            llama_stack_client,
+            model=text_model_id,
+            instructions="You are a helpful assistant",
+        )
+        return
+
+    session_id = agent.create_session(f"test-session-{uuid4()}")
+
+    agent.create_turn(
+        messages=[
+            {
+                "role": "user",
+                "content": "Give me a sentence that contains the word: hello",
+            }
+        ],
+        session_id=session_id,
+        stream=False,
+    )
+
+    all_spans = []
+    for span in llama_stack_client.telemetry.query_spans(
+        attribute_filters=[
+            {"key": "session_id", "op": "eq", "value": session_id},
+        ],
+        attributes_to_return=["input", "output", "agent_name", "agent_id", "session_id"],
+    ):
+        all_spans.append(span.attributes)
+
+    agent_name_spans = []
+    for span in llama_stack_client.telemetry.query_spans(
+        attribute_filters=[],
+        attributes_to_return=["agent_name"],
+    ):
+        if "agent_name" in span.attributes:
+            agent_name_spans.append(span.attributes)
+
+    agent_logs = []
+    for span in llama_stack_client.telemetry.query_spans(
+        attribute_filters=[
+            {"key": "agent_name", "op": "eq", "value": agent_name},
+        ],
+        attributes_to_return=["input", "output", "agent_name"],
+    ):
+        if "output" in span.attributes and span.attributes["output"] != "no shields":
+            agent_logs.append(span.attributes)
+
+    assert len(agent_logs) == 1
+    assert agent_logs[0]["agent_name"] == agent_name
+    assert "Give me a sentence that contains the word: hello" in agent_logs[0]["input"]
+    assert "hello" in agent_logs[0]["output"].lower()
+
+
+def test_tool_config(agent_config):
     common_params = dict(
         model="meta-llama/Llama-3.2-3B-Instruct",
         instructions="You are a helpful assistant",
@@ -171,7 +235,7 @@ def test_tool_config(llama_stack_client_with_mocked_inference, agent_config):
         Server__AgentConfig(**agent_config)
 
 
-def test_builtin_tool_web_search(llama_stack_client_with_mocked_inference, agent_config):
+def test_builtin_tool_web_search(llama_stack_client, agent_config):
     agent_config = {
         **agent_config,
         "instructions": "You are a helpful assistant that can use web search to answer questions.",
@@ -179,7 +243,7 @@ def test_builtin_tool_web_search(llama_stack_client_with_mocked_inference, agent
             "builtin::websearch",
         ],
     }
-    agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    agent = Agent(llama_stack_client, **agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
@@ -202,14 +266,15 @@ def test_builtin_tool_web_search(llama_stack_client_with_mocked_inference, agent
     assert found_tool_execution
 
 
-def test_builtin_tool_code_execution(llama_stack_client_with_mocked_inference, agent_config):
+@pytest.mark.skip(reason="Code interpreter is currently disabled in the Stack")
+def test_builtin_tool_code_execution(llama_stack_client, agent_config):
     agent_config = {
         **agent_config,
         "tools": [
             "builtin::code_interpreter",
         ],
     }
-    agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    agent = Agent(llama_stack_client, **agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
@@ -231,7 +296,8 @@ def test_builtin_tool_code_execution(llama_stack_client_with_mocked_inference, a
 # This test must be run in an environment where `bwrap` is available. If you are running against a
 # server, this means the _server_ must have `bwrap` available. If you are using library client, then
 # you must have `bwrap` available in test's environment.
-def test_code_interpreter_for_attachments(llama_stack_client_with_mocked_inference, agent_config):
+@pytest.mark.skip(reason="Code interpreter is currently disabled in the Stack")
+def test_code_interpreter_for_attachments(llama_stack_client, agent_config):
     agent_config = {
         **agent_config,
         "tools": [
@@ -239,7 +305,7 @@ def test_code_interpreter_for_attachments(llama_stack_client_with_mocked_inferen
         ],
     }
 
-    codex_agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    codex_agent = Agent(llama_stack_client, **agent_config)
     session_id = codex_agent.create_session(f"test-session-{uuid4()}")
     inflation_doc = Document(
         content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
@@ -267,21 +333,21 @@ def test_code_interpreter_for_attachments(llama_stack_client_with_mocked_inferen
         assert "Tool:code_interpreter" in logs_str
 
 
-def test_custom_tool(llama_stack_client_with_mocked_inference, agent_config):
+def test_custom_tool(llama_stack_client, agent_config):
     client_tool = get_boiling_point
     agent_config = {
         **agent_config,
         "tools": [client_tool],
     }
 
-    agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    agent = Agent(llama_stack_client, **agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
         messages=[
             {
                 "role": "user",
-                "content": "What is the boiling point of polyjuice?",
+                "content": "What is the boiling point of the liquid polyjuice in celsius?",
             },
         ],
         session_id=session_id,
@@ -293,7 +359,7 @@ def test_custom_tool(llama_stack_client_with_mocked_inference, agent_config):
     assert "get_boiling_point" in logs_str
 
 
-def test_custom_tool_infinite_loop(llama_stack_client_with_mocked_inference, agent_config):
+def test_custom_tool_infinite_loop(llama_stack_client, agent_config):
     client_tool = get_boiling_point
     agent_config = {
         **agent_config,
@@ -302,7 +368,7 @@ def test_custom_tool_infinite_loop(llama_stack_client_with_mocked_inference, age
         "max_infer_iters": 5,
     }
 
-    agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    agent = Agent(llama_stack_client, **agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
@@ -320,25 +386,21 @@ def test_custom_tool_infinite_loop(llama_stack_client_with_mocked_inference, age
     assert num_tool_calls <= 5
 
 
-def test_tool_choice_required(llama_stack_client_with_mocked_inference, agent_config):
-    tool_execution_steps = run_agent_with_tool_choice(
-        llama_stack_client_with_mocked_inference, agent_config, "required"
-    )
+def test_tool_choice_required(llama_stack_client, agent_config):
+    tool_execution_steps = run_agent_with_tool_choice(llama_stack_client, agent_config, "required")
     assert len(tool_execution_steps) > 0
 
 
-def test_tool_choice_none(llama_stack_client_with_mocked_inference, agent_config):
-    tool_execution_steps = run_agent_with_tool_choice(llama_stack_client_with_mocked_inference, agent_config, "none")
+def test_tool_choice_none(llama_stack_client, agent_config):
+    tool_execution_steps = run_agent_with_tool_choice(llama_stack_client, agent_config, "none")
     assert len(tool_execution_steps) == 0
 
 
-def test_tool_choice_get_boiling_point(llama_stack_client_with_mocked_inference, agent_config):
+def test_tool_choice_get_boiling_point(llama_stack_client, agent_config):
     if "llama" not in agent_config["model"].lower():
         pytest.xfail("NotImplemented for non-llama models")
 
-    tool_execution_steps = run_agent_with_tool_choice(
-        llama_stack_client_with_mocked_inference, agent_config, "get_boiling_point"
-    )
+    tool_execution_steps = run_agent_with_tool_choice(llama_stack_client, agent_config, "get_boiling_point")
     assert len(tool_execution_steps) >= 1 and tool_execution_steps[0].tool_calls[0].tool_name == "get_boiling_point"
 
 
@@ -359,7 +421,7 @@ def run_agent_with_tool_choice(client, agent_config, tool_choice):
         messages=[
             {
                 "role": "user",
-                "content": "What is the boiling point of polyjuice?",
+                "content": "What is the boiling point of the liquid polyjuice in celsius?",
             },
         ],
         session_id=session_id,
@@ -370,7 +432,7 @@ def run_agent_with_tool_choice(client, agent_config, tool_choice):
 
 
 @pytest.mark.parametrize("rag_tool_name", ["builtin::rag/knowledge_search", "builtin::rag"])
-def test_rag_agent(llama_stack_client_with_mocked_inference, agent_config, rag_tool_name):
+def test_rag_agent(llama_stack_client, agent_config, rag_tool_name):
     urls = ["chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst"]
     documents = [
         Document(
@@ -382,12 +444,12 @@ def test_rag_agent(llama_stack_client_with_mocked_inference, agent_config, rag_t
         for i, url in enumerate(urls)
     ]
     vector_db_id = f"test-vector-db-{uuid4()}"
-    llama_stack_client_with_mocked_inference.vector_dbs.register(
+    llama_stack_client.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model="all-MiniLM-L6-v2",
         embedding_dimension=384,
     )
-    llama_stack_client_with_mocked_inference.tool_runtime.rag_tool.insert(
+    llama_stack_client.tool_runtime.rag_tool.insert(
         documents=documents,
         vector_db_id=vector_db_id,
         # small chunks help to get specific info out of the docs
@@ -404,7 +466,7 @@ def test_rag_agent(llama_stack_client_with_mocked_inference, agent_config, rag_t
             )
         ],
     }
-    rag_agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    rag_agent = Agent(llama_stack_client, **agent_config)
     session_id = rag_agent.create_session(f"test-session-{uuid4()}")
     user_prompts = [
         (
@@ -429,7 +491,7 @@ def test_rag_agent(llama_stack_client_with_mocked_inference, agent_config, rag_t
             assert expected_kw in response.output_message.content.lower()
 
 
-def test_rag_agent_with_attachments(llama_stack_client_with_mocked_inference, agent_config):
+def test_rag_agent_with_attachments(llama_stack_client, agent_config):
     urls = ["llama3.rst", "lora_finetune.rst"]
     documents = [
         # passign as url
@@ -452,7 +514,7 @@ def test_rag_agent_with_attachments(llama_stack_client_with_mocked_inference, ag
             metadata={},
         ),
     ]
-    rag_agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    rag_agent = Agent(llama_stack_client, **agent_config)
     session_id = rag_agent.create_session(f"test-session-{uuid4()}")
     user_prompts = [
         (
@@ -487,7 +549,8 @@ def test_rag_agent_with_attachments(llama_stack_client_with_mocked_inference, ag
     assert "lora" in response.output_message.content.lower()
 
 
-def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_config):
+@pytest.mark.skip(reason="Code interpreter is currently disabled in the Stack")
+def test_rag_and_code_agent(llama_stack_client, agent_config):
     if "llama-4" in agent_config["model"].lower():
         pytest.xfail("Not working for llama4")
 
@@ -512,12 +575,12 @@ def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_conf
         )
     )
     vector_db_id = f"test-vector-db-{uuid4()}"
-    llama_stack_client_with_mocked_inference.vector_dbs.register(
+    llama_stack_client.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model="all-MiniLM-L6-v2",
         embedding_dimension=384,
     )
-    llama_stack_client_with_mocked_inference.tool_runtime.rag_tool.insert(
+    llama_stack_client.tool_runtime.rag_tool.insert(
         documents=documents,
         vector_db_id=vector_db_id,
         chunk_size_in_tokens=128,
@@ -532,7 +595,7 @@ def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_conf
             "builtin::code_interpreter",
         ],
     }
-    agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    agent = Agent(llama_stack_client, **agent_config)
     user_prompts = [
         (
             "when was Perplexity the company founded?",
@@ -566,7 +629,7 @@ def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_conf
     "client_tools",
     [(get_boiling_point, False), (get_boiling_point_with_metadata, True)],
 )
-def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_config, client_tools):
+def test_create_turn_response(llama_stack_client, agent_config, client_tools):
     client_tool, expects_metadata = client_tools
     agent_config = {
         **agent_config,
@@ -575,7 +638,7 @@ def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_co
         "tools": [client_tool],
     }
 
-    agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    agent = Agent(llama_stack_client, **agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     input_prompt = f"Call {client_tools[0].__name__} tool and answer What is the boiling point of polyjuice?"
@@ -611,39 +674,50 @@ def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_co
             last_step_completed_at = step.completed_at
 
 
-def test_multi_tool_calls(llama_stack_client_with_mocked_inference, agent_config):
-    if "gpt" not in agent_config["model"]:
-        pytest.xfail("Only tested on GPT models")
+def test_multi_tool_calls(llama_stack_client, agent_config):
+    if "gpt" not in agent_config["model"] and "llama-4" not in agent_config["model"].lower():
+        pytest.xfail("Only tested on GPT and Llama 4 models")
 
     agent_config = {
         **agent_config,
         "tools": [get_boiling_point],
     }
 
-    agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    agent = Agent(llama_stack_client, **agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
         messages=[
             {
                 "role": "user",
-                "content": "Call get_boiling_point twice to answer: What is the boiling point of polyjuice in both celsius and fahrenheit?",
+                "content": "Call get_boiling_point twice to answer: What is the boiling point of polyjuice in both celsius and fahrenheit?.\nUse the tool responses to answer the question.",
             },
         ],
         session_id=session_id,
         stream=False,
     )
     steps = response.steps
-    assert len(steps) == 7
-    assert steps[0].step_type == "shield_call"
-    assert steps[1].step_type == "inference"
-    assert steps[2].step_type == "shield_call"
-    assert steps[3].step_type == "tool_execution"
-    assert steps[4].step_type == "shield_call"
-    assert steps[5].step_type == "inference"
-    assert steps[6].step_type == "shield_call"
 
-    tool_execution_step = steps[3]
+    has_input_shield = agent_config.get("input_shields")
+    has_output_shield = agent_config.get("output_shields")
+    assert len(steps) == 3 + (2 if has_input_shield else 0) + (2 if has_output_shield else 0)
+    if has_input_shield:
+        assert steps[0].step_type == "shield_call"
+        steps.pop(0)
+    assert steps[0].step_type == "inference"
+    if has_output_shield:
+        assert steps[1].step_type == "shield_call"
+        steps.pop(1)
+    assert steps[1].step_type == "tool_execution"
+    tool_execution_step = steps[1]
+    if has_input_shield:
+        assert steps[2].step_type == "shield_call"
+        steps.pop(2)
+    assert steps[2].step_type == "inference"
+    if has_output_shield:
+        assert steps[3].step_type == "shield_call"
+        steps.pop(3)
+
     assert len(tool_execution_step.tool_calls) == 2
     assert tool_execution_step.tool_calls[0].tool_name.startswith("get_boiling_point")
     assert tool_execution_step.tool_calls[1].tool_name.startswith("get_boiling_point")
diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
new file mode 100644
index 000000000..26eac527b
--- /dev/null
+++ b/tests/integration/agents/test_openai_responses.py
@@ -0,0 +1,223 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import pytest
+from openai import OpenAI
+
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+
+@pytest.fixture
+def openai_client(client_with_models):
+    base_url = f"{client_with_models.base_url}/v1/openai/v1"
+    return OpenAI(base_url=base_url, api_key="bar")
+
+
+@pytest.mark.parametrize(
+    "stream",
+    [
+        True,
+        False,
+    ],
+)
+@pytest.mark.parametrize(
+    "tools",
+    [
+        [],
+        [
+            {
+                "type": "function",
+                "name": "get_weather",
+                "description": "Get the weather in a given city",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city to get the weather for"},
+                    },
+                },
+            }
+        ],
+    ],
+)
+def test_responses_store(openai_client, client_with_models, text_model_id, stream, tools):
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+    message = "What's the weather in Tokyo?" + (
+        " YOU MUST USE THE get_weather function to get the weather." if tools else ""
+    )
+    response = client.responses.create(
+        model=text_model_id,
+        input=[
+            {
+                "role": "user",
+                "content": message,
+            }
+        ],
+        stream=stream,
+        tools=tools,
+    )
+    if stream:
+        # accumulate the streamed content
+        content = ""
+        response_id = None
+        for chunk in response:
+            if response_id is None:
+                response_id = chunk.response.id
+            if chunk.type == "response.completed":
+                response_id = chunk.response.id
+                output_type = chunk.response.output[0].type
+                if output_type == "message":
+                    content = chunk.response.output[0].content[0].text
+    else:
+        response_id = response.id
+        output_type = response.output[0].type
+        if output_type == "message":
+            content = response.output[0].content[0].text
+
+    # list responses - use the underlying HTTP client for endpoints not in SDK
+    list_response = client._client.get("/responses")
+    assert list_response.status_code == 200
+    data = list_response.json()["data"]
+    assert response_id in [r["id"] for r in data]
+
+    # test retrieve response
+    retrieved_response = client.responses.retrieve(response_id)
+    assert retrieved_response.id == response_id
+    assert retrieved_response.model == text_model_id
+    assert retrieved_response.output[0].type == output_type, retrieved_response
+    if output_type == "message":
+        assert retrieved_response.output[0].content[0].text == content
+
+
+def test_list_response_input_items(openai_client, client_with_models, text_model_id):
+    """Test the new list_openai_response_input_items endpoint."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+    message = "What is the capital of France?"
+
+    # Create a response first
+    response = client.responses.create(
+        model=text_model_id,
+        input=[
+            {
+                "role": "user",
+                "content": message,
+            }
+        ],
+        stream=False,
+    )
+
+    response_id = response.id
+
+    # Test the new list input items endpoint
+    input_items_response = client.responses.input_items.list(response_id=response_id)
+
+    # Verify the structure follows OpenAI API spec
+    assert input_items_response.object == "list"
+    assert hasattr(input_items_response, "data")
+    assert isinstance(input_items_response.data, list)
+    assert len(input_items_response.data) > 0
+
+    # Verify the input item contains our message
+    input_item = input_items_response.data[0]
+    assert input_item.type == "message"
+    assert input_item.role == "user"
+    assert message in str(input_item.content)
+
+
+def test_list_response_input_items_with_limit_and_order(openai_client, client_with_models, text_model_id):
+    """Test the list input items endpoint with limit and order parameters."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    # Create a response with multiple input messages to test limit and order
+    # Use distinctive content to make order verification more reliable
+    messages = [
+        {"role": "user", "content": "Message A: What is the capital of France?"},
+        {"role": "assistant", "content": "The capital of France is Paris."},
+        {"role": "user", "content": "Message B: What about Spain?"},
+        {"role": "assistant", "content": "The capital of Spain is Madrid."},
+        {"role": "user", "content": "Message C: And Italy?"},
+    ]
+
+    response = client.responses.create(
+        model=text_model_id,
+        input=messages,
+        stream=False,
+    )
+
+    response_id = response.id
+
+    # First get all items to establish baseline
+    all_items_response = client.responses.input_items.list(response_id=response_id)
+    assert all_items_response.object == "list"
+    total_items = len(all_items_response.data)
+    assert total_items == 5  # Should have all 5 input messages
+
+    # Test 1: Limit parameter - request only 2 items
+    limited_response = client.responses.input_items.list(response_id=response_id, limit=2)
+    assert limited_response.object == "list"
+    assert len(limited_response.data) == min(2, total_items)  # Should be exactly 2 or total if less
+
+    # Test 2: Edge case - limit larger than available items
+    large_limit_response = client.responses.input_items.list(response_id=response_id, limit=10)
+    assert large_limit_response.object == "list"
+    assert len(large_limit_response.data) == total_items  # Should return all available items
+
+    # Test 3: Edge case - limit of 1
+    single_item_response = client.responses.input_items.list(response_id=response_id, limit=1)
+    assert single_item_response.object == "list"
+    assert len(single_item_response.data) == 1
+
+    # Test 4: Order parameter - ascending vs descending
+    asc_response = client.responses.input_items.list(response_id=response_id, order="asc")
+    desc_response = client.responses.input_items.list(response_id=response_id, order="desc")
+
+    assert asc_response.object == "list"
+    assert desc_response.object == "list"
+    assert len(asc_response.data) == len(desc_response.data) == total_items
+
+    # Verify order is actually different (if we have multiple items)
+    if total_items > 1:
+        # First item in asc should be last item in desc (reversed order)
+        first_asc_content = str(asc_response.data[0].content)
+        first_desc_content = str(desc_response.data[0].content)
+        last_asc_content = str(asc_response.data[-1].content)
+        last_desc_content = str(desc_response.data[-1].content)
+
+        # The first item in asc should be the last item in desc (and vice versa)
+        assert first_asc_content == last_desc_content, (
+            f"Expected first asc ({first_asc_content}) to equal last desc ({last_desc_content})"
+        )
+        assert last_asc_content == first_desc_content, (
+            f"Expected last asc ({last_asc_content}) to equal first desc ({first_desc_content})"
+        )
+
+        # Verify the distinctive content markers are in the right positions
+        assert "Message A" in first_asc_content, "First item in ascending order should contain 'Message A'"
+        assert "Message C" in first_desc_content, "First item in descending order should contain 'Message C'"
+
+    # Test 5: Combined limit and order
+    combined_response = client.responses.input_items.list(response_id=response_id, limit=3, order="desc")
+    assert combined_response.object == "list"
+    assert len(combined_response.data) == min(3, total_items)
+
+    # Test 6: Verify combined response has correct order for first few items
+    if total_items >= 3:
+        # Should get the last 3 items in descending order (most recent first)
+        assert "Message C" in str(combined_response.data[0].content), "First item should be most recent (Message C)"
+        # The exact second and third items depend on the implementation, but let's verify structure
+        for item in combined_response.data:
+            assert hasattr(item, "content")
+            assert hasattr(item, "role")
+            assert hasattr(item, "type")
+            assert item.type == "message"
+            assert item.role in ["user", "assistant"]
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 22290b519..ec5918268 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -10,19 +10,37 @@ import platform
 import textwrap
 import time
 
+import pytest
 from dotenv import load_dotenv
 
 from llama_stack.log import get_logger
 
-from .report import Report
-
 logger = get_logger(__name__, category="tests")
 
 
+@pytest.hookimpl(hookwrapper=True)
+def pytest_runtest_makereport(item, call):
+    outcome = yield
+    report = outcome.get_result()
+    if report.when == "call":
+        item.execution_outcome = report.outcome
+        item.was_xfail = getattr(report, "wasxfail", False)
+
+
 def pytest_runtest_teardown(item):
-    interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
-    if interval_seconds:
-        time.sleep(float(interval_seconds))
+    # Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
+    outcome = getattr(item, "execution_outcome", None)
+    was_xfail = getattr(item, "was_xfail", False)
+
+    name = item.nodeid
+    if not any(x in name for x in ("inference/", "safety/", "agents/")):
+        return
+
+    logger.debug(f"Test '{item.nodeid}' outcome was '{outcome}' (xfail={was_xfail})")
+    if outcome in ("passed", "failed") and not was_xfail:
+        interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
+        if interval_seconds:
+            time.sleep(float(interval_seconds))
 
 
 def pytest_configure(config):
@@ -40,9 +58,6 @@ def pytest_configure(config):
         os.environ["DISABLE_CODE_SANDBOX"] = "1"
         logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
 
-    if config.getoption("--report"):
-        config.pluginmanager.register(Report(config))
-
 
 def pytest_addoption(parser):
     parser.addoption(
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 1878c9e88..8b6b3ddbe 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -4,24 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import copy
 import inspect
-import logging
 import os
 import tempfile
-from pathlib import Path
 
 import pytest
 import yaml
 from llama_stack_client import LlamaStackClient
+from openai import OpenAI
 
 from llama_stack import LlamaStackAsLibraryClient
-from llama_stack.apis.datatypes import Api
 from llama_stack.distribution.stack import run_config_from_adhoc_config_spec
 from llama_stack.env import get_env_or_fail
 
-from .recordable_mock import RecordableMock
-
 
 @pytest.fixture(scope="session")
 def provider_data():
@@ -45,63 +40,6 @@ def provider_data():
     return provider_data
 
 
-@pytest.fixture(scope="session")
-def llama_stack_client_with_mocked_inference(llama_stack_client, request):
-    """
-    Returns a client with mocked inference APIs and tool runtime APIs that use recorded responses by default.
-
-    If --record-responses is passed, it will call the real APIs and record the responses.
-    """
-    # TODO: will rework this to be more stable
-    return llama_stack_client
-    if not isinstance(llama_stack_client, LlamaStackAsLibraryClient):
-        logging.warning(
-            "llama_stack_client_with_mocked_inference is not supported for this client, returning original client without mocking"
-        )
-        return llama_stack_client
-
-    record_responses = request.config.getoption("--record-responses")
-    cache_dir = Path(__file__).parent / "recorded_responses"
-
-    # Create a shallow copy of the client to avoid modifying the original
-    client = copy.copy(llama_stack_client)
-
-    # Get the inference API used by the agents implementation
-    agents_impl = client.async_client.impls[Api.agents]
-    original_inference = agents_impl.inference_api
-
-    # Create a new inference object with the same attributes
-    inference_mock = copy.copy(original_inference)
-
-    # Replace the methods with recordable mocks
-    inference_mock.chat_completion = RecordableMock(
-        original_inference.chat_completion, cache_dir, "chat_completion", record=record_responses
-    )
-    inference_mock.completion = RecordableMock(
-        original_inference.completion, cache_dir, "text_completion", record=record_responses
-    )
-    inference_mock.embeddings = RecordableMock(
-        original_inference.embeddings, cache_dir, "embeddings", record=record_responses
-    )
-
-    # Replace the inference API in the agents implementation
-    agents_impl.inference_api = inference_mock
-
-    original_tool_runtime_api = agents_impl.tool_runtime_api
-    tool_runtime_mock = copy.copy(original_tool_runtime_api)
-
-    # Replace the methods with recordable mocks
-    tool_runtime_mock.invoke_tool = RecordableMock(
-        original_tool_runtime_api.invoke_tool, cache_dir, "invoke_tool", record=record_responses
-    )
-    agents_impl.tool_runtime_api = tool_runtime_mock
-
-    # Also update the client.inference for consistency
-    client.inference = inference_mock
-
-    return client
-
-
 @pytest.fixture(scope="session")
 def inference_provider_type(llama_stack_client):
     providers = llama_stack_client.providers.list()
@@ -176,7 +114,7 @@ def skip_if_no_model(request):
 
 
 @pytest.fixture(scope="session")
-def llama_stack_client(request, provider_data, text_model_id):
+def llama_stack_client(request, provider_data):
     config = request.config.getoption("--stack-config")
     if not config:
         config = get_env_or_fail("LLAMA_STACK_CONFIG")
@@ -207,3 +145,9 @@ def llama_stack_client(request, provider_data, text_model_id):
         raise RuntimeError("Initialization failed")
 
     return client
+
+
+@pytest.fixture(scope="session")
+def openai_client(client_with_models):
+    base_url = f"{client_with_models.base_url}/v1/openai/v1"
+    return OpenAI(base_url=base_url, api_key="fake")
diff --git a/tests/integration/fixtures/recordable_mock.py b/tests/integration/fixtures/recordable_mock.py
deleted file mode 100644
index 632d5b3ef..000000000
--- a/tests/integration/fixtures/recordable_mock.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import importlib
-import json
-import os
-import re
-from datetime import datetime
-from enum import Enum
-from pathlib import Path
-
-
-class RecordableMock:
-    """A mock that can record and replay API responses."""
-
-    def __init__(self, real_func, cache_dir, func_name, record=False):
-        self.real_func = real_func
-        self.json_path = Path(cache_dir) / f"{func_name}.json"
-        self.record = record
-        self.cache = {}
-
-        # Load existing cache if available and not recording
-        if self.json_path.exists():
-            try:
-                with open(self.json_path, "r") as f:
-                    self.cache = json.load(f)
-            except Exception as e:
-                print(f"Error loading cache from {self.json_path}: {e}")
-                raise
-
-    async def __call__(self, *args, **kwargs):
-        """
-        Returns a coroutine that when awaited returns the result or an async generator,
-        matching the behavior of the original function.
-        """
-        # Create a cache key from the arguments
-        key = self._create_cache_key(args, kwargs)
-
-        if self.record:
-            # In record mode, always call the real function
-            real_result = self.real_func(*args, **kwargs)
-
-            # If it's a coroutine, we need to create a wrapper coroutine
-            if hasattr(real_result, "__await__"):
-                # Define a coroutine function that will record the result
-                async def record_coroutine():
-                    try:
-                        # Await the real coroutine
-                        result = await real_result
-
-                        # Check if the result is an async generator
-                        if hasattr(result, "__aiter__"):
-                            # It's an async generator, so we need to record its chunks
-                            chunks = []
-
-                            # Create and return a new async generator that records chunks
-                            async def recording_generator():
-                                nonlocal chunks
-                                async for chunk in result:
-                                    chunks.append(chunk)
-                                    yield chunk
-                                # After all chunks are yielded, save to cache
-                                self.cache[key] = {"type": "generator", "chunks": chunks}
-                                self._save_cache()
-
-                            return recording_generator()
-                        else:
-                            # It's a regular result, save it to cache
-                            self.cache[key] = {"type": "value", "value": result}
-                            self._save_cache()
-                            return result
-                    except Exception as e:
-                        print(f"Error in recording mode: {e}")
-                        raise
-
-                return await record_coroutine()
-            else:
-                # It's already an async generator, so we need to record its chunks
-                async def record_generator():
-                    chunks = []
-                    async for chunk in real_result:
-                        chunks.append(chunk)
-                        yield chunk
-                    # After all chunks are yielded, save to cache
-                    self.cache[key] = {"type": "generator", "chunks": chunks}
-                    self._save_cache()
-
-                return record_generator()
-        elif key not in self.cache:
-            # In replay mode, if the key is not in the cache, throw an error
-            raise KeyError(
-                f"No cached response found for key: {key}\nRun with --record-responses to record this response."
-            )
-        else:
-            # In replay mode with a cached response
-            cached_data = self.cache[key]
-
-            # Check if it's a value or chunks
-            if cached_data.get("type") == "value":
-                # It's a regular value
-                return self._reconstruct_object(cached_data["value"])
-            else:
-                # It's chunks from an async generator
-                async def replay_generator():
-                    for chunk in cached_data["chunks"]:
-                        yield self._reconstruct_object(chunk)
-
-                return replay_generator()
-
-    def _create_cache_key(self, args, kwargs):
-        """Create a hashable key from the function arguments, ignoring auto-generated IDs."""
-        # Convert to JSON strings with sorted keys
-        key = json.dumps((args, kwargs), sort_keys=True, default=self._json_default)
-
-        # Post-process the key with regex to replace IDs with placeholders
-        # Replace UUIDs and similar patterns
-        key = re.sub(r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "<UUID>", key)
-
-        # Replace temporary file paths created by tempfile.mkdtemp()
-        key = re.sub(r"/var/folders/[^,'\"\s]+", "<TEMP_FILE>", key)
-
-        # Replace /tmp/ paths which are also commonly used for temporary files
-        key = re.sub(r"/tmp/[^,'\"\s]+", "<TEMP_FILE>", key)
-
-        return key
-
-    def _save_cache(self):
-        """Save the cache to disk in JSON format."""
-        os.makedirs(self.json_path.parent, exist_ok=True)
-
-        # Write the JSON file with pretty formatting
-        try:
-            with open(self.json_path, "w") as f:
-                json.dump(self.cache, f, indent=2, sort_keys=True, default=self._json_default)
-                # write another empty line at the end of the file to make pre-commit happy
-                f.write("\n")
-        except Exception as e:
-            print(f"Error saving JSON cache: {e}")
-
-    def _json_default(self, obj):
-        """Default function for JSON serialization of objects."""
-
-        if isinstance(obj, datetime):
-            return {
-                "__datetime__": obj.isoformat(),
-                "__module__": obj.__class__.__module__,
-                "__class__": obj.__class__.__name__,
-            }
-
-        if isinstance(obj, Enum):
-            return {
-                "__enum__": obj.__class__.__name__,
-                "value": obj.value,
-                "__module__": obj.__class__.__module__,
-            }
-
-        # Handle Pydantic models
-        if hasattr(obj, "model_dump"):
-            model_data = obj.model_dump()
-            return {
-                "__pydantic__": obj.__class__.__name__,
-                "__module__": obj.__class__.__module__,
-                "data": model_data,
-            }
-
-    def _reconstruct_object(self, data):
-        """Reconstruct an object from its JSON representation."""
-        if isinstance(data, dict):
-            # Check if this is a serialized datetime
-            if "__datetime__" in data:
-                try:
-                    module_name = data.get("__module__", "datetime")
-                    class_name = data.get("__class__", "datetime")
-
-                    # Try to import the specific datetime class
-                    module = importlib.import_module(module_name)
-                    dt_class = getattr(module, class_name)
-
-                    # Parse the ISO format string
-                    dt = dt_class.fromisoformat(data["__datetime__"])
-                    return dt
-                except (ImportError, AttributeError, ValueError) as e:
-                    print(f"Error reconstructing datetime: {e}")
-                    return data
-
-            # Check if this is a serialized enum
-            elif "__enum__" in data:
-                try:
-                    module_name = data.get("__module__", "builtins")
-                    enum_class = self._import_class(module_name, data["__enum__"])
-                    return enum_class(data["value"])
-                except (ImportError, AttributeError) as e:
-                    print(f"Error reconstructing enum: {e}")
-                    return data
-
-            # Check if this is a serialized Pydantic model
-            elif "__pydantic__" in data:
-                try:
-                    module_name = data.get("__module__", "builtins")
-                    model_class = self._import_class(module_name, data["__pydantic__"])
-                    return model_class(**self._reconstruct_object(data["data"]))
-                except (ImportError, AttributeError) as e:
-                    print(f"Error reconstructing Pydantic model: {e}")
-                    return data
-
-            # Regular dictionary
-            return {k: self._reconstruct_object(v) for k, v in data.items()}
-
-        # Handle lists
-        elif isinstance(data, list):
-            return [self._reconstruct_object(item) for item in data]
-
-        # Return primitive types as is
-        return data
-
-    def _import_class(self, module_name, class_name):
-        """Import a class from a module."""
-        module = __import__(module_name, fromlist=[class_name])
-        return getattr(module, class_name)
diff --git a/tests/integration/fixtures/recorded_responses/chat_completion.json b/tests/integration/fixtures/recorded_responses/chat_completion.json
deleted file mode 100644
index 37bb28ac2..000000000
--- a/tests/integration/fixtures/recorded_responses/chat_completion.json
+++ /dev/null
@@ -1,56789 +0,0 @@
-{
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"false\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " degrees Fahrenheit.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "9ksjMloe",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:58.345129+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6aGYLk4UShyrQ7uz",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 139
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "9ksjMloe",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:58.345170+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6aGYLk4UShyrQ7uz",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 23
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "9ksjMloe",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:58.345177+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6aGYLk4UShyrQ7uz",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 162
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"get_boiling_point",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\", \"parameters\": {\"liquid_name\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"polyjuice\", \"celcius\": \"false\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "false",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "55492018-ad19-4593-9171-2b5dc2089960",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "vTzYAYfO",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:56.985637+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "H8ytqaQLQXe6sEEJ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 91
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "vTzYAYfO",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:56.985707+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "H8ytqaQLQXe6sEEJ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 45
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "vTzYAYfO",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:56.985718+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "H8ytqaQLQXe6sEEJ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 136
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_point\", \"parameters\": {\"liquid_name\": \"polyjuice",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\", \"celcius\": \"true\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "true",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "6dd93d40-18ea-40c1-9e4d-78b3bd865e67",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "tBuntiC1",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:54.993737+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "5SueXj79Q2e5n37g",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 43
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "tBuntiC1",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:54.993758+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "5SueXj79Q2e5n37g",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "tBuntiC1",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:54.993761+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "5SueXj79Q2e5n37g",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 53
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100\u00b0C.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "03QQgo3b",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:34.636678+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "mE4SuRfcQUOcOyP2",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 85
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "03QQgo3b",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:34.636767+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "mE4SuRfcQUOcOyP2",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 22
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "03QQgo3b",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:34.636773+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "mE4SuRfcQUOcOyP2",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 107
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point_with_metadata\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100\u00b0C.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "vzNuoz4e",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:45.792508+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "vNRMmadcTVmfkn5-",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 87
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "vzNuoz4e",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:45.792536+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "vNRMmadcTVmfkn5-",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 22
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "vzNuoz4e",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:45.792544+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "vNRMmadcTVmfkn5-",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 109
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point\",",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " \"parameters\": {\"liquid_name\": \"polyjuice\", \"celci",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "us\": \"true\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "true",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "98d5962a-eab3-4d83-bca4-d4d6aa54f1dc",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "1A0bWgLL",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:24.102366+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "4a5HMcM9R3uWB4Cv",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 37
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "1A0bWgLL",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:24.102404+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "4a5HMcM9R3uWB4Cv",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "1A0bWgLL",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:24.102411+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "4a5HMcM9R3uWB4Cv",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point_with",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_metadata\", \"parameters\": {\"liquid_name\": \"polyjuice\", \"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "celcius\": \"true\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "true",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "ee5ac18d-de3b-4985-9e93-545de166d3e2",
-                "tool_name": "get_boiling_point_with_metadata"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "dsGyjpUB",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:45.316534+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "BO0etAZ6RFmGmLCW",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 37
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "dsGyjpUB",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:45.316569+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "BO0etAZ6RFmGmLCW",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "dsGyjpUB",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:45.316576+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "BO0etAZ6RFmGmLCW",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Give me a sentence that contains the word: hello\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": []}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " customer smiled and said \"hello\" to the friendly store clerk.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "R9a1QHt4",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:19.586300+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "t-ZRvSMzTCudL6SB",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "R9a1QHt4",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:19.586359+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "t-ZRvSMzTCudL6SB",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 24
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "R9a1QHt4",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:19.586367+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "t-ZRvSMzTCudL6SB",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 54
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\nprint(df.head())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error message indicates that the `bwrap.core",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "` module is not found. This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " is because the `bwrap` module is not installed in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " your Python environment.\n\nTo fix this issue,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " you can use the `knowledge_search` function to describe the CSV",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " file. This function can be used to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " search for information in a database, and it might have",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " access to information about the CSV file.\n\nHere is an example of",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " how you can use the `knowledge_search` function to describe the CSV",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " file:\n\n```\n{\n  \"type\": \"function\",\n  \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "name\": \"knowledge_search\",\n  \"parameters\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " {\n    \"query\": \"Describe the CSV file at /var",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "/folders/cz/vyh7y1",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "d11xg881lsxsshnc5c0000gn/T",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "/tmpvto5j2dr/u8MQ2jywin",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "flation.csv\"\n  }\n}\n```",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "9UjZne1U",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:15.341367+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "cOvUfJZLSK2vci9f",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 149
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "9UjZne1U",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:15.341380+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "cOvUfJZLSK2vci9f",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 188
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "9UjZne1U",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:15.341383+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "cOvUfJZLSK2vci9f",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 337
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\ndf = pd",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".read_csv(\"/var/folders/cz/vyh7y1",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "d11xg881lsxsshnc5c0000",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "gn/T/tmpvto5j2",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "dr/u8MQ2jywinflation.csv\")\nprint(df",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".head())",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpvto5j2dr/u8MQ2jywinflation.csv\")\nprint(df.head())"
-                },
-                "call_id": "ecc9db21-332f-4931-8820-cf139f8a0b88",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "6VEDipbd",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:14.030541+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "cOvUfJZLSK2vci9f",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 37
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "6VEDipbd",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:14.030577+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "cOvUfJZLSK2vci9f",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "6VEDipbd",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:14.030584+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "cOvUfJZLSK2vci9f",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is too large to be uploaded, you can provide a sample of the csv file and I can help you describe it. \\n\\nHere is an example of how you can describe a csv file using pandas:\\n\\n```\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the data\\nprint(df.head())\\n# Print the last 5 rows of the data\\nprint(df.tail())\\n# Print the summary statistics of the data\\nprint(df.describe())\\n# Print the data types of each column\\nprint(df.dtypes)\\n# Print the number of missing values in each column\\nprint(df.isnull().sum())\\n```\\n\\nThis will give you an idea of what the csv file contains.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code will create a line plot of the average yearly inflation over time",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". The x-axis represents the year and the y-axis represents the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " average inflation. Each point on the plot represents the average inflation for",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " a particular year.\n\nPlease note that you need to replace 'in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "flation.csv' with the actual path to your",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " csv file. Also, this code assumes that the csv file has a",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " column named 'date' and another column named 'inflation'. If your",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " csv file has different column names, you need to replace 'date' and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 'inflation' with the actual column names.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "Hm1BkrMQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:32:41.982115+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "T857cf9QSamVBOAy",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 636
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "Hm1BkrMQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:32:41.982147+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "T857cf9QSamVBOAy",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 126
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "Hm1BkrMQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:32:41.982153+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "T857cf9QSamVBOAy",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 762
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is too large to be uploaded, you can provide a sample of the csv file and I can help you describe it. \\n\\nHere is an example of how you can describe a csv file using pandas:\\n\\n```\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the data\\nprint(df.head())\\n# Print the last 5 rows of the data\\nprint(df.tail())\\n# Print the summary statistics of the data\\nprint(df.describe())\\n# Print the data types of each column\\nprint(df.dtypes)\\n# Print the number of missing values in each column\\nprint(df.isnull().sum())\\n```\\n\\nThis will give you an idea of what the csv file contains.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " data\ndf = pd.read_csv('inflation.csv')\n\n#",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " Convert date column to datetime\ndf['date'] = pd.to",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_datetime(df['date'])\n\n# Group by year and calculate average inflation",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\naverage_inflation = df.groupby(df['date'].dt.year",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ")['inflation'].mean()\n\n# Plot the time series\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".figure(figsize=(10,6))\nplt.plot(average_inflation",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".index, average_inflation.values, marker='o')\nplt.title",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "('Average Yearly Inflation')\nplt.xlabel('Year')\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".ylabel('Average Inflation')\nplt.grid(True)\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "plt.show()",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert date column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()"
-                },
-                "call_id": "4849f8b5-bbb8-4c7e-8f19-498dd559dbe2",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "ZKjmS7HQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:32:30.999750+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "T857cf9QSamVBOAy",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 450
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "ZKjmS7HQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:32:30.999780+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "T857cf9QSamVBOAy",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "ZKjmS7HQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:32:30.999786+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "T857cf9QSamVBOAy",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 460
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a remote server or a local machine, you can use the `pd.read_csv()` function to load the csv file. \\n\\nHere is an example:\\n\\n```python\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the dataframe\\nprint(df.head())\\n# Print the summary of the dataframe\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will print the first 5 rows of the dataframe, the summary of the dataframe (including the index dtype and column dtypes, non-nullable counts, and memory usage), and the descriptive statistics of the dataframe.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code will create a line plot of the average yearly inflation over time.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " The x-axis represents the year and the y-axis represents the average inflation.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " The plot also includes a title, labels for the x and y axes,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and a grid for better visibility.\n\nPlease note that you need to replace '",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "inflation.csv' with the actual path to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " your csv file. Also, this code",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " assumes that the 'date' column in your",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " csv file is in a format that can be",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " parsed by pandas' `to_datetime` function. If your date",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " column is in a different format, you",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " may need to specify the format using the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " `format` parameter of `to_datetime`.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "Yv7iXXNJ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:50.214420+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "srzTfsP6Sr-co-Ll",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 621
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "Yv7iXXNJ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:50.214481+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "srzTfsP6Sr-co-Ll",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 143
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "Yv7iXXNJ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:50.214490+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "srzTfsP6Sr-co-Ll",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 764
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a remote server or a local machine, you can use the `pd.read_csv()` function to load the csv file. \\n\\nHere is an example:\\n\\n```python\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the dataframe\\nprint(df.head())\\n# Print the summary of the dataframe\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will print the first 5 rows of the dataframe, the summary of the dataframe (including the index dtype and column dtypes, non-nullable counts, and memory usage), and the descriptive statistics of the dataframe.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\ndf = pd.read_csv('inflation.csv')\n\n# Convert",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " 'date' column to datetime\ndf['date'] = pd.to",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_inflation = df.groupby(df['date'].dt.year)['inflation'].",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "))\nplt.plot(average_inflation.index, average_inflation.values, marker",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".show()",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert 'date' column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()"
-                },
-                "call_id": "62e5a10d-8a59-41e7-9f0e-87cabc7d15fa",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "dv6g9n2H",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:48.391101+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "srzTfsP6Sr-co-Ll",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 433
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "dv6g9n2H",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:48.391113+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "srzTfsP6Sr-co-Ll",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "dv6g9n2H",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:48.391116+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "srzTfsP6Sr-co-Ll",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 443
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "It",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " seems that the file \"/var/folders/cz/vyh7",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "y1d11xg881lsxsshnc5c",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "0000gn/T/tmpvto5j",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "2dr/JwKzVg",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "5Ainflation.csv\" does not exist. \n\nTo describe the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " csv file, you need to provide the actual file path or the file itself",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". If you are using a remote server or a local machine, you can",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use the `pd.read_csv()` function to load",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the csv file. \n\nHere is an example:\n\n```",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "python\nimport pandas as pd\n# Load data\ndf =",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " pd.read_csv('inflation.csv')\n# Print the first 5 rows",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of the dataframe\nprint(df.head())\n# Print",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the summary of the dataframe\nprint(df.info())\nprint(df.describe",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "())\n```\n\nThis will print the first 5 rows of the dataframe",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", the summary of the dataframe",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " (including the index dtype and column dtypes, non-nullable",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " counts, and memory usage), and the descriptive statistics of the dataframe.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "qV1E8nPK",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:41.439164+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "GG3oeA3qRH6WIf6Z",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 215
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "qV1E8nPK",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:41.439188+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "GG3oeA3qRH6WIf6Z",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 216
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "qV1E8nPK",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:41.439190+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "GG3oeA3qRH6WIf6Z",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 431
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\n# Load data\ndf = pd.read",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_csv(\"/var/folders/cz/vyh7y1d11",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "xg881lsxsshnc5c0000gn/T/tmp",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "vto5j2dr/JwKzVg",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "5Ainflation.csv\")\n# Rows\nprint(\"Number",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " of rows and columns in the data:\", df.shape)\n# Columns",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\nprint(\"Columns of the data",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " are:\", len(df.columns))\n# Column names\nprint",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "(\"Columns of the data are:\", df.columns)\n# Column dt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "ypes\nprint(\"Datatype of the columns are:\", df",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".dtypes)",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpvto5j2dr/JwKzVg5Ainflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)"
-                },
-                "call_id": "87c3ef49-27e0-4561-ade3-83569a0fe236",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "9OTP08Yr",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:39.830624+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "GG3oeA3qRH6WIf6Z",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 36
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "9OTP08Yr",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:39.830656+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "GG3oeA3qRH6WIf6Z",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "9OTP08Yr",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:39.830662+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "GG3oeA3qRH6WIf6Z",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 46
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:61fc5\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:af027\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:d5787\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:af027\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d5787\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:af027\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:af027\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:af027\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:af027\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:af027\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these steps",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ":\n\n1.  Install Torchtune and its dependencies.\n2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".  Download the Llama2 weights and tokenizer.\n3. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Use the `lora_llama2_7b` model in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune, which applies LoRA to the Q and V projections",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " by default.\n4.  Load the base model weights into the Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA model without any conversion necessary.\n5.  Set only",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " LoRA parameters to trainable.\n6.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "  Run the LoRA finetuning recipe in Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with the desired configuration.\n\nYou can also",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " experiment with different LoRA configurations, such as applying LoRA to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " all linear layers in the self-attention, increasing the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " rank, or scaling alpha and rank",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " together.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "DfEa48OY",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:09.255488+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XVSIgZRXR_aHBiAN",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 158
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "DfEa48OY",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:09.255559+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XVSIgZRXR_aHBiAN",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 167
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "DfEa48OY",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:09.255562+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XVSIgZRXR_aHBiAN",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 325
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:61fc5\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:af027\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:d5787\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:af027\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d5787\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "knowledge_search\", \"parameters\": {\"query\": \"How",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "14b82c7e-18d4-4b46-8f07-442be700e8ae",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "DBZOtUux",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:58.136315+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XVSIgZRXR_aHBiAN",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 117
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "DBZOtUux",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:58.136380+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XVSIgZRXR_aHBiAN",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "DBZOtUux",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:58.136387+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XVSIgZRXR_aHBiAN",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:61fc5\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:af027\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:d5787\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:af027\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d5787\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torchtune based on the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " documentation you provided. What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "gFK_4CQi",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:56.169962+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "A2oXFF9fRz2-Lc9N",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 75
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "gFK_4CQi",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:56.169995+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "A2oXFF9fRz2-Lc9N",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 35
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "gFK_4CQi",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:56.170001+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "A2oXFF9fRz2-Lc9N",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78970\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:8404f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:cbeb1\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:8404f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:cbeb1\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8404f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:8404f\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:8404f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:8404f\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:8404f\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these steps:\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "1.  Install Torchtune and its",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " dependencies",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "2.  Download the Llama2 weights and tokenizer.\n3.  Use the `lora_llama2_7b` model in Torchtune, which applies LoRA to the Q and V projections by default.\n4.  Load the base",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " model weights into the LoRA model without any conversion necessary.\n5",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".  Set only LoRA parameters to trainable.\n6.  Run",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the LoRA finetuning recipe in Torchtune with the desired",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " configuration.\n\nYou can also experiment with different LoRA configurations, such as",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " applying LoRA to all linear layers in the self-attention, increasing",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the rank, or scaling alpha and rank together.\n\nNote that LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can be beneficial for reducing memory usage during fine-tuning, but it may",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " also impact model performance. You can trade off memory and model performance by",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " adjusting the LoRA configuration and running experiments with different settings.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "Aw_FYSo-",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:33:57.305154+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "gd_zuJXnSaSfS3ZK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 158
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "Aw_FYSo-",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:33:57.305251+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "gd_zuJXnSaSfS3ZK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 212
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "Aw_FYSo-",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:33:57.305267+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "gd_zuJXnSaSfS3ZK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 370
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78970\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:8404f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:cbeb1\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:8404f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:cbeb1\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search\", \"parameters\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " {\"query\": \"How to use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "dc7dd9e0-6ca1-452e-bb62-532a09e71848",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "1iT28abM",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:33:53.948952+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "gd_zuJXnSaSfS3ZK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 117
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "1iT28abM",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:33:53.949001+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "gd_zuJXnSaSfS3ZK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "1iT28abM",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:33:53.949013+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "gd_zuJXnSaSfS3ZK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78970\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:8404f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:cbeb1\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:8404f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:cbeb1\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torchtune based on",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the documentation you provided. What's your",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "F3R1-xJM",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:33:52.280696+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7Do839YJRHC_ADjC",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 75
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "F3R1-xJM",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:33:52.280743+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7Do839YJRHC_ADjC",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 35
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "F3R1-xJM",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:33:52.280778+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7Do839YJRHC_ADjC",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78a41\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:7b4a7\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:531f2\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:7b4a7\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:531f2\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:7b4a7\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:7b4a7\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:7b4a7\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:7b4a7\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:7b4a7\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " follow these steps:\n\n1.  Install",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune and its dependencies.\n2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".  Download the Llama2 weights and tokenizer.\n3. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Construct a Llama2 model with LoRA layers using the `l",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ora_llama2_7b` function.\n4.  Load",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the base model weights into the LoRA model using the `load_state",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_dict` method.\n5.  Set only LoRA parameters",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to trainable using the `get_adapter_params` and `set_train",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "able_params` functions.\n6.  Run a LoRA finet",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une using Torchtune's `Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA recipe`.\n\nYou can also experiment",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with different LoRA configurations, such as applying LoRA to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " all linear layers in the self-attention, increasing the rank, or",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " scaling alpha.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "mR4ZUK-O",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:43.753366+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "c_UJ92LEQciFQx3T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 158
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "mR4ZUK-O",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:43.753395+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "c_UJ92LEQciFQx3T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 176
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "mR4ZUK-O",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:43.753399+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "c_UJ92LEQciFQx3T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 334
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78a41\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:7b4a7\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:531f2\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:7b4a7\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:531f2\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\", \"parameters\": {\"query\": \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "How to use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "721ea24f-be72-45fc-892c-aa7843f21ddf",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "VxsqbWot",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:42.471323+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "c_UJ92LEQciFQx3T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 117
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "VxsqbWot",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:42.471354+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "c_UJ92LEQciFQx3T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "VxsqbWot",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:42.471364+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "c_UJ92LEQciFQx3T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78a41\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:7b4a7\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:531f2\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:7b4a7\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:531f2\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune based on the documentation you provided. What's your",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "V87G94tT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:40.786211+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "zdMkkXSDT0mK4qaK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 75
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "V87G94tT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:40.786377+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "zdMkkXSDT0mK4qaK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 35
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "V87G94tT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:40.786394+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "zdMkkXSDT0mK4qaK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:d341f\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:900f3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:49640\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:900f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:49640\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:900f3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:900f3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:900f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:900f3\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:900f3\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these steps",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ":\n\n1.  Install Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and its dependencies.\n2. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Download the Llama2 weights and tokenizer",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n3.  Use the `lora_llama2_",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "7b` model in Torchtune, which applies LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to the Q and V projections by default.\n4.  Load",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the base model weights into the LoRA model without any conversion",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " necessary.\n5.  Set only LoRA parameters to trainable",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n6.  Run the LoRA finetuning recipe in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune with the desired configuration.\n\nYou",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can also experiment with different LoRA configurations, such as applying Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA to all linear layers in the self-attention, increasing the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " rank, or scaling alpha and rank together.\n\nNote that LoRA can",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " be beneficial for reducing memory usage during fine-tuning, but it may",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " also impact model performance. You can trade off memory and model performance",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " by adjusting the LoRA configuration and running experiments with different settings",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "zPIxK_rl",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:34:08.906834+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "fM03LVqrT7ufMvUA",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 158
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "zPIxK_rl",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:34:08.906934+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "fM03LVqrT7ufMvUA",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 212
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "zPIxK_rl",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:34:08.906949+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "fM03LVqrT7ufMvUA",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 370
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:d341f\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:900f3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:49640\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:900f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:49640\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_search\", \"parameters\": {\"query",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\": \"How to use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "38c8de4c-95b1-44b6-a685-c153631305d1",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "t7U94vaX",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:34:07.491116+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "fM03LVqrT7ufMvUA",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 117
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "t7U94vaX",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:34:07.491187+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "fM03LVqrT7ufMvUA",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "t7U94vaX",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:34:07.491195+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "fM03LVqrT7ufMvUA",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:d341f\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:900f3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:49640\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:900f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:49640\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " based on the documentation you provided. What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "8iPkD4Fz",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:34:05.798649+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "JlE9DKp_RnCewBUu",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 75
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "8iPkD4Fz",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:34:05.798743+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "JlE9DKp_RnCewBUu",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 35
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "8iPkD4Fz",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:34:05.798759+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "JlE9DKp_RnCewBUu",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\": {\"query\": \"Torchtune documentation\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Torchtune documentation"
-                },
-                "call_id": "b92c0200-4acb-4b6f-8ec7-2e2f993d6e1a",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "eANTdkZu",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:45.683600+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "A2oXFF9fRz2-Lc9N",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 39
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "eANTdkZu",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:45.683632+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "A2oXFF9fRz2-Lc9N",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "eANTdkZu",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:45.683639+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "A2oXFF9fRz2-Lc9N",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 49
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \\\"role\\\": \\\"system\\\",\\n            \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " attention type used by Llama3-8",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "B is grouped-query attention.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "l8TIu3wW",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:37.955798+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rOU-VODXQUuIR6_p",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 80
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "l8TIu3wW",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:37.955879+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rOU-VODXQUuIR6_p",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 26
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "l8TIu3wW",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:37.955889+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rOU-VODXQUuIR6_p",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 106
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \\\"role\\\": \\\"system\\\",\\n            \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " attention type used by Llama3-8B is grouped-query attention.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "Ihnuyt_Y",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:24.902478+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6eJM3WR0QsyIiMfg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 80
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "Ihnuyt_Y",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:24.902491+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6eJM3WR0QsyIiMfg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 26
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "Ihnuyt_Y",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:24.902493+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6eJM3WR0QsyIiMfg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 106
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "    \"type\": \"function\",\n    \"name\": \"knowledge_search",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\",\n    \"parameters\": {\n        \"query\": \"Llama3-",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "8B attention type\"\n    }\n}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Llama3-8B attention type"
-                },
-                "call_id": "0af9e857-510d-4df8-872f-51b520578c22",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "b4C_3cNl",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:27.116730+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rOU-VODXQUuIR6_p",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "b4C_3cNl",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:27.116756+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rOU-VODXQUuIR6_p",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 48
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "b4C_3cNl",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:27.116762+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rOU-VODXQUuIR6_p",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 88
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\", \"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "parameters\": {\"query\": \"Llama3-8B attention type",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Llama3-8B attention type"
-                },
-                "call_id": "69cc8903-d256-40bb-aa1e-7f3935986e49",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "05SrG-G4",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:24.286222+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6eJM3WR0QsyIiMfg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "05SrG-G4",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:24.286242+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6eJM3WR0QsyIiMfg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "05SrG-G4",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:24.286244+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6eJM3WR0QsyIiMfg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 50
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"current CEO of Meta\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"{\\\"query\\\": \\\"current CEO of Meta\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\\\", \\\"score\\\": 0.8190992, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.79099923, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meet the Executive CSuite Team of Meta (Facebook) [2025]\\\", \\\"url\\\": \\\"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\\\", \\\"content\\\": \\\"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\\\", \\\"score\\\": 0.7602419, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) - Investopedia\\\", \\\"url\\\": \\\"https://www.investopedia.com/terms/m/mark-zuckerberg.asp\\\", \\\"content\\\": \\\"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg is a self-taught computer programmer and co-founder, chair, and chief executive officer of Meta (META), formerly known as Facebook. Mark Zuckerberg is a self-taught computer programmer and the co-founder, chair, and CEO of Meta (formerly Facebook). In April 2018, Zuckerberg testified on Capitol Hill about Facebook's use of users' information, including the sharing of 87 million users' information to Cambridge Analytica. Technically, Mark Zuckerberg makes a salary of $1 a year at Facebook. Booker Join With Facebook Founder and CEO Mark Zuckerberg to Advance a National Model for Improving Public Schools.\\\\\\\"\\\", \\\"score\\\": 0.74697095, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Forbes\\\", \\\"url\\\": \\\"https://www.forbes.com/profile/mark-zuckerberg/\\\", \\\"content\\\": \\\"Meta CEO Mark Zuckerberg \\\\u201cloved\\\\u201d an image on Facebook known as \\\\\\\"Challah Horse\\\\\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\\\", \\\"score\\\": 0.6410185, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " current CEO of Meta is Mark Zuckerberg.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "HyrnM7Qp",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:30.044240+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7cHuamFcQay638rC",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 1203
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "HyrnM7Qp",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:30.044278+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7cHuamFcQay638rC",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 19
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "HyrnM7Qp",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:30.044287+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7cHuamFcQay638rC",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 1222
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "brave_search.call(query=\"current CEO of Meta\")",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "current CEO of Meta"
-                },
-                "call_id": "a4d59df1-70b9-4f99-84ea-aa3a103b82ad",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "brave_search"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "jOaA28AT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:21.259444+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7cHuamFcQay638rC",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 34
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "jOaA28AT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:21.259478+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7cHuamFcQay638rC",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "jOaA28AT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:21.259485+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7cHuamFcQay638rC",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 44
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is not able to find the boiling point",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of polyjuice as it is a fictional liquid from the Harry Potter series",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". The function is only able to find the boiling point of real liquids.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "hmXLMi0u",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:14.642967+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "-Go8XWSYSRG2j2Ea",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 70
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "hmXLMi0u",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:14.642981+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "-Go8XWSYSRG2j2Ea",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 56
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "hmXLMi0u",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:14.642984+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "-Go8XWSYSRG2j2Ea",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 126
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not able to find the boiling point of polyjuice as",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " it is not a real liquid.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "ttsui3ip",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:53.513474+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "p1tRy8A3Q7KFFDLH",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 70
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "ttsui3ip",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:53.513507+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "p1tRy8A3Q7KFFDLH",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 38
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "ttsui3ip",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:53.513514+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "p1tRy8A3Q7KFFDLH",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is not able to find the boiling point",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of polyjuice as it is not a real liquid.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "nUJGFTmQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:07.133674+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "Xtf06INCSmyxkwGf",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 70
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "nUJGFTmQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:07.133708+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "Xtf06INCSmyxkwGf",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 38
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "nUJGFTmQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:07.133715+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "Xtf06INCSmyxkwGf",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\", \"parameters\": {\"liquid_name\": \"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "polyjuice\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "1e925ff5-d0b8-4b87-b3c3-a1a36f69626d",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "OG8Jlmhk",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:10.868586+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "KgDQc2UfSrau2dZD",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "OG8Jlmhk",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:10.868615+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "KgDQc2UfSrau2dZD",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "OG8Jlmhk",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:10.868621+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "KgDQc2UfSrau2dZD",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_point\", \"parameters\": {\"liquid_name\": \"polyjuice",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "5721b667-748d-4e14-953c-ec67ad2aa152",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "mmWnwqPx",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:51.740989+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "i8h2T9ZHRMiTL0YG",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "mmWnwqPx",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:51.741006+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "i8h2T9ZHRMiTL0YG",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "mmWnwqPx",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:51.741009+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "i8h2T9ZHRMiTL0YG",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"none\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " couldn't find any information on the boiling point of Polyjuice. Polyju",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ice is a magical potion in the Harry Potter series that allows the drinker",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to transform into someone else. It's not a physical substance",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with a boiling point. If you have",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " any other questions, I'd be happy to help.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "_CvLa4Gk",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:09.509742+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "GUkufTl4SZSHCyBF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "_CvLa4Gk",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:09.509773+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "GUkufTl4SZSHCyBF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 73
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "_CvLa4Gk",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:09.509780+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "GUkufTl4SZSHCyBF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 103
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point\",",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " \"parameters\": {\"liquid_name\": \"polyjuice\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "7208784f-0e3f-4ae5-933b-7cc96b2d9375",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "MiP-_LQE",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:04.875000+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "3_z5Yy0wStST3JAm",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "MiP-_LQE",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:04.875027+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "3_z5Yy0wStST3JAm",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "MiP-_LQE",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:41:04.875032+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "3_z5Yy0wStST3JAm",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef get_nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(get_nth_prime(100))\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 100th prime number is 541.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "1eo6b4br",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:38.093912+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "PA3C-YQ-RtaWHr7k",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 251
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "1eo6b4br",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:38.093946+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "PA3C-YQ-RtaWHr7k",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 20
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "1eo6b4br",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:38.093956+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "PA3C-YQ-RtaWHr7k",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 271
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "def is_prime(n):\n    if n <= 1:\n       ",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " return False\n    if n <= 3",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ":\n        return True\n    if n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " % 2 == 0 or n % 3 ==",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " 0:\n        return False\n    i = 5\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "    while i * i <= n:\n       ",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " if n % i == 0 or n % (i",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " + 2) == 0:\n            return False\n       ",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " i += 6\n    return True\n\ndef get_nth_prime",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "(n):\n    count = 0\n    num = 2\n    while",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " True:\n        if is_prime(num):\n            count += 1\n            if",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " count == n:\n                return num\n        num += 1\n\nprint",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "(get_nth_prime(100))",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "def is_prime(n):\n    if n <= 1:\n        return False\n    if n <= 3:\n        return True\n    if n % 2 == 0 or n % 3 == 0:\n        return False\n    i = 5\n    while i * i <= n:\n        if n % i == 0 or n % (i + 2) == 0:\n            return False\n        i += 6\n    return True\n\ndef get_nth_prime(n):\n    count = 0\n    num = 2\n    while True:\n        if is_prime(num):\n            count += 1\n            if count == n:\n                return num\n        num += 1\n\nprint(get_nth_prime(100))"
-                },
-                "call_id": "6e8a3719-a151-4f66-bee2-416bb262b9ad",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "ONk3SjW9",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:37.386737+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "PA3C-YQ-RtaWHr7k",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "ONk3SjW9",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:37.386768+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "PA3C-YQ-RtaWHr7k",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "ONk3SjW9",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:40:37.386775+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "PA3C-YQ-RtaWHr7k",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 50
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "Per",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "plexity the company was founded in 2022.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "vFe6LmM2",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:18.095687+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1TSzhwWfQVaTaa-W",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 105
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "vFe6LmM2",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:18.095731+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1TSzhwWfQVaTaa-W",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 22
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "vFe6LmM2",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:18.095738+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1TSzhwWfQVaTaa-W",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 127
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_search\", \"parameters\": {\"query\": \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "Perplexity company founding date\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Perplexity company founding date"
-                },
-                "call_id": "d631bb54-a82b-43c2-a2ad-cfb6f137a30c",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "o0vtaC1m",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:17.530116+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1TSzhwWfQVaTaa-W",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 67
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "o0vtaC1m",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:17.530143+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1TSzhwWfQVaTaa-W",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 37
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "o0vtaC1m",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:17.530149+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1TSzhwWfQVaTaa-W",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 104
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\":",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " \"knowledge_search\", \"parameters\": {\"query\": \"Perplexity",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " company founding date\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Perplexity company founding date"
-                },
-                "call_id": "fdd3b71b-9608-4e31-b2dc-4019d5732c9c",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "pP3mZKZI",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:16.766858+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1TSzhwWfQVaTaa-W",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 29
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "pP3mZKZI",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:16.766887+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1TSzhwWfQVaTaa-W",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "pP3mZKZI",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:16.766890+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1TSzhwWfQVaTaa-W",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 39
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " NBA was created on August 3, 1949, with",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the merger of the Basketball Association of America (BAA) and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the National Basketball League (NBL).",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "2IUoADvp",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:20.625791+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_7bSgNpLRmSbHN6U",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 103
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "2IUoADvp",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:20.625819+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_7bSgNpLRmSbHN6U",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 45
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "2IUoADvp",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:20.625827+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_7bSgNpLRmSbHN6U",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 148
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search\", \"parameters",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\": {\"query\": \"when was the nba created\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "when was the nba created"
-                },
-                "call_id": "0c671028-deee-4ee8-95bd-5aec474c1ac9",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "bY3DnNes",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:20.197499+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_7bSgNpLRmSbHN6U",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 65
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "bY3DnNes",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:20.197531+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_7bSgNpLRmSbHN6U",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 37
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "bY3DnNes",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:20.197538+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_7bSgNpLRmSbHN6U",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 102
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\",",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " \"parameters\": {\"query\": \"when was the nba created\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "when was the nba created"
-                },
-                "call_id": "92a4755c-66e1-43bb-ac4b-cb63109591e7",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "_lkO0yBc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:19.550197+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_7bSgNpLRmSbHN6U",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 27
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "_lkO0yBc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:19.550227+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_7bSgNpLRmSbHN6U",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "_lkO0yBc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:42:19.550235+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_7bSgNpLRmSbHN6U",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 37
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"false\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100 degrees Fahrenheit.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 139
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 23
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 162
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point\", \"parameters\": {\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "liquid_name\": \"polyjuice\", \"celcius\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"false\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "false",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "fc7e2525-3e7b-47ff-8731-12dd7655dfd6",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 91
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 45
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 136
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"false\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " degrees Fahrenheit.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 139
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 23
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 162
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"false\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100 degrees Fahrenheit.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 139
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 23
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 162
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"get_boiling_point\", \"parameters\": {\"liquid_name",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\": \"polyjuice\", \"celcius\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"false\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "false",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "b0413eb2-f446-4e09-910b-7d8ba4375c87",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 91
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 45
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 136
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"get_boiling_point\", \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "parameters\": {\"liquid_name\": \"polyjuice\", \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "celcius\": \"false\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "false",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "1ef7adda-5ebb-41d5-a2c6-3e6700de5f81",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 91
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 45
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 136
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "    \"type\": \"function_call\",\n    \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "name\": \"get_boiling_point\",\n    \"parameters\": {\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "        \"liquid_name\": \"polyjuice\",\n        \"celcius",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\": \"true\"\n    }\n}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "true",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "62095a5a-c53c-4850-9f4f-b3a41699a32b",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 43
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 56
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 99
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "    \"type\": \"function\",\n    \"name\": \"get",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_boiling_point\",\n    \"parameters\": {\n        \"liquid",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_name\": \"polyjuice\",\n        \"celci",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "us\": \"true\"\n    }\n}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "true",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "40293d5b-8a76-4df5-8325-d6e8755ba513",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 43
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 55
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 98
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " -100\u00b0C.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 85
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 22
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 107
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100\u00b0C",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 85
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 22
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 107
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100\u00b0C.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 85
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 22
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 107
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point_with_metadata\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\u00b0C.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 87
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 22
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 109
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\u00b0C.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 87
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 22
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 109
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "100 degrees Celcius.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 87
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 25
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 112
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\":",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " \"get_boiling_point\",",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " \"parameters\": {\"liquid_name\": \"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "polyjuice\", \"celcius\": \"true\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "true",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "139fe8b9-7bfc-4dcb-ac0d-da1d97257c6e",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 37
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_point_with_metadata\", \"parameters\": {\"liquid",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_name\": \"polyjuice\", \"celcius\": \"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "true\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "true",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "49ab2b64-cbcb-4e71-b02c-99026116c45e",
-                "tool_name": "get_boiling_point_with_metadata"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 37
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "name\": \"get_boiling_point\", \"parameters",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\": {\"liquid_name\": \"polyjuice\", \"celci",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "us\": \"true\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "true",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "f146d04b-c400-4193-a6d8-ccfea7f7b529",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 37
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "get_boiling_point_with_metadata\", \"parameters\": {\"liquid",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_name\": \"polyjuice\", \"celci",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "us\": \"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "true\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": "true",
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "d6b8a25d-9b4c-4650-bbe6-f94b5fa97e56",
-                "tool_name": "get_boiling_point_with_metadata"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 37
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Give me a sentence that contains the word: hello\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": []}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " customer smiled and said \"hello\" to the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " friendly store clerk.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 30
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 24
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 54
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\nprint(df.head())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error message indicates that the `bwrap.core` module is",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not found. This is because the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " `bwrap.core` module is not a standard Python module",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and is not installed by default.\n\nTo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " fix this issue, you can use",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the `pathlib` module to access the file directly. Here",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'s an updated code snippet:\n\n```python\nimport pandas",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " as pd\nfrom pathlib import Path\n\nfile_path",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " = Path(\"/var/folders/cz/vyh7y",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "1d11xg881lsxsshnc5c0000gn",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "/T/tmpeipex0j0",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "/b807hgTQinflation.csv\")\ndf = pd.read_csv",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "(file_path)\nprint(df.head())\n```\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This code uses the `Path` class from the `pathlib",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "` module to create a path object for the file. The `",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "read_csv` method is then used to read the CSV file into",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " a pandas DataFrame.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\nprint(df.head())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nYear  Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec\\n0  2014  1.6  1.6  1.7  1.8  2.0  1.9  1.9  1.7  1.7  1.8  1.7  1.6\\n1  2015  1.6  1.7  1.8  1.8  1.7  1.8  1.8  1.8  1.9  1.9  2.0  2.1\\n2  2016  2.2  2.3  2.2  2.1  2.2  2.2  2.2  2.3  2.2  2.1  2.1  2.2\\n3  2017  2.3  2.2  2.0  1.9  1.7  1.7  1.7  1.7  1.7  1.8  1.7  1.8\\n4  2018  1.8  1.8  2.1  2.1  2.2  2.3  2.4  2.2  2.2  2.1  2.2  2.2\\n[/stdout]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " csv file contains data on inflation rates for each month of the year from",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 2014 to 2018. The columns are:\n\n- Year",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ": The year of the inflation rate\n-",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Jan to Dec: The inflation rate for each month of",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the year\n\nThe inflation rates are all in the range of ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "1.6 to 2.4, indicating",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " a relatively stable inflation rate over the years.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 471
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 91
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 562
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 142, in <module>\\n  line 23, in <module>\\n    from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\\nImportError: attempted relative import with no known parent package\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 142, in <module>\\n  line 23, in <module>\\n    from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\\nImportError: attempted relative import with no known parent package\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error message indicates that there is an issue with the import statement. However",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", the code provided does not contain any import statements that",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " would cause this error.\n\nTo provide a more accurate answer, I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " would need to know the contents of the CSV file or more information",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " about the error message.\n\nHowever, based on the code provided, it",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " seems like the code is trying to load a CSV",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " file and print some basic information about it. If the file is",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not found or there is an issue with the file",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " path, this could cause an error.\n\nHere is a revised version",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of the code that includes some error handling:\n\n``",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "`\nimport pandas as pd\nimport code_interpreter\n\ntry:\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "    # Load the CSV file\n    df = pd.read_csv(\"/",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "var/folders/cz/vyh7y1d11x",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "g881lsxsshnc5c0000gn/T/tmp",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_d_cdeif/6TpkUAo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "0inflation.csv\")\n\n    # Print the first few rows of the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " dataframe\n    print(df.head())\n\n    # Print the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " data types of each column\n    print(df",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".dtypes)\n\n    # Print the summary statistics of the dataframe\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "    print(df.describe())\n\nexcept FileNotFoundError:\n    print(\"The file was",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not found.\")\nexcept pd.errors.EmptyDataError:\n    print(\"The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " file is empty.\")\nexcept pd.errors.ParserError:\n    print(\"An",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error occurred while parsing the file.\")\nexcept Exception as e:\n    print",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "(\"An error occurred: \", str(e))\n```\n\nThis code will",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " catch specific exceptions that could occur when loading the CSV file and print a",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " more informative error message.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 391
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 330
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 721
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 142, in <module>\\n  line 23, in <module>\\n    from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\\nImportError: attempted relative import with no known parent package\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport code_interpreter\n\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "# Load the CSV file\ndf = pd.read_csv(\"/var",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "/folders/cz/vyh7y1d11xg881",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "lsxsshnc5c0000gn/T/tmp_d_cdeif",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "/6TpkUAo0inflation.csv\")\n\n# Print the",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " first few rows of the dataframe\nprint(df.head())\n\n# Print the",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " data types of each column\nprint(df.dtypes)\n\n# Print the",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " summary statistics of the dataframe\nprint(df.describe())",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmp_d_cdeif/6TpkUAo0inflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print the summary statistics of the dataframe\nprint(df.describe())"
-                },
-                "call_id": "fa1b393f-3fc7-416f-98ab-05d879def880",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 214
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 224
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m unable to access the file you provided",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". However, I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can suggest how you can describe the CSV file using the pandas library in Python.\n\nYou can use the `head()`, `dtypes`, and `describe()` functions to get an overview of the CSV file",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n\n- `head()`: This function prints the first few rows of the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " dataframe, giving you an idea of what the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " data looks like.\n- `dtypes`: This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function prints the data types of each column in the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " dataframe.\n- `describe()`: This function prints summary",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " statistics of the dataframe, including mean, standard deviation, minimum, maximum,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and quartiles for numeric columns, and count and unique values for",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " object columns.\n\nIf you want to get more information about the CSV file,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " you can use the `info()` function, which prints a concise summary",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of the dataframe, including the index dtype and column dtypes, non-",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "nullable values, and memory usage.\n\nPlease make sure the file is in the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " correct format and is accessible to the Python script.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(code_interpreter.get_file_path(\\\"<TEMP_FILE>\"))\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error message indicates that the `bwrap.core` module is not found",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". This is likely because the `bwrap` library is not installed",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". To fix this, you can install the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " `bwrap` library using pip:\n\n```\npip install",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " bwrap\n```\n\nIf you are still facing issues",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", you can try to use the `code_interpreter.get_file_path",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "()` function to load the CSV file directly, as shown in the corrected",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code above.\n\nAlternatively, if you don't have access to the `code",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_interpreter` library, you can use the `pandas.read_csv",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "()` function with the file path as a string:\n\n```\ndf = pd",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".read_csv(\"/var/folders/cz/vyh7y1d11",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "xg881lsxsshnc5c0000gn/T/tmp4ed",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "7p2bg/Csr659svinflation.csv\")\n```\n\nThis",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " should load the CSV file and allow you to inspect its contents.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\ndf = pd.read_csv(code_interpreter.get_file_path(\"/var",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "/folders/cz/vyh7y1",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "d11xg881lsxsshnc5c0000gn/T",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "/tmp4ed7p2bg/Csr659svinflation.csv\"))\n\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "# Print the first few rows of the dataframe\nprint(df.head",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "# Print the summary statistics of the dataframe\nprint(df.describe())",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file\ndf = pd.read_csv(code_interpreter.get_file_path(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmp4ed7p2bg/Csr659svinflation.csv\"))\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print the summary statistics of the dataframe\nprint(df.describe())"
-                },
-                "call_id": "c5d0fce3-d7c6-4da1-89e4-e727df42f356",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/c",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "z/vyh7y1d11xg881lsxsshnc5",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "c0000gn/T/tmpe8u6r9sz/R",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "ChoI8s0inflation.csv\")\nprint(df.head())",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpe8u6r9sz/RChoI8s0inflation.csv\")\nprint(df.head())"
-                },
-                "call_id": "35e85870-f8f3-44f4-8879-e7b02a2805f6",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 37
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\nprint(df.head())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nYear  Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec\\n0  2014  1.6  1.6  1.7  1.8  2.0  1.9  1.9  1.7  1.7  1.8  1.7  1.6\\n1  2015  1.6  1.7  1.8  1.8  1.7  1.8  1.8  1.8  1.9  1.9  2.0  2.1\\n2  2016  2.2  2.3  2.2  2.1  2.2  2.2  2.2  2.3  2.2  2.1  2.1  2.2\\n3  2017  2.3  2.2  2.0  1.9  1.7  1.7  1.7  1.7  1.7  1.8  1.7  1.8\\n4  2018  1.8  1.8  2.1  2.1  2.2  2.3  2.4  2.2  2.2  2.1  2.2  2.2\\n[/stdout]\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " csv file contains a table with 12 columns (Jan to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Dec) and 5 rows (2014 to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 2018). The values in the table represent the inflation rate",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " for each month of the year from 2014",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to 2018.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 469
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 61
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 530
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 142, in <module>\\n  line 23, in <module>\\n    from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\\nImportError: attempted relative import with no known parent package\\n[/stderr]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 142, in <module>\\n  line 23, in <module>\\n    from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\\nImportError: attempted relative import with no known parent package\\n[/stderr]\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error message indicates that there is an issue with the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " import statement. However, the code provided does",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not contain any import statements that would cause this error.\n\nTo provide",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " a more accurate answer, I would need to know the contents of the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " CSV file or more information about the error message.\n\nHowever, based on",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the code provided, it seems like the intention is to load a CSV",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " file and print some basic information about it. If the file is not",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " found or there is an issue with the file path, this could cause",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " an error.\n\nHere is an updated version of the code that includes some",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error handling:\n\n```\nimport pandas as pd\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "import code_interpreter\n\ntry:\n    #",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Load the CSV file\n    df = pd.read_csv(\"/",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "var/folders/cz/vyh7y1d11",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "xg881lsxsshnc5c0000gn/T/tmpmy",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "lybr76/IEQ51fUginflation.csv\")\n\n   ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " # Print the first few rows of the dataframe\n    print(df",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".head())\n\n    # Print the data",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " types of each column\n    print(df.dtypes)\n\n    #",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Print the summary statistics of the dataframe\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "    print(df.describe())\n\nexcept FileNotFoundError:\n    print(\"The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " file was not found.\")\nexcept pd.errors.Empty",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "DataError:\n    print(\"The file",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " is empty.\")\nexcept pd.errors.ParserError:\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "    print(\"An error occurred while parsing the file.\")\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "except Exception as e:\n    print(\"An error occurred: \",",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " str(e))\n```\n\nThis code will",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " catch specific exceptions that could occur when loading the CSV file and print",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " a more informative error message.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 389
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 328
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 717
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 142, in <module>\\n  line 23, in <module>\\n    from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\\nImportError: attempted relative import with no known parent package\\n[/stderr]\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport code_interpreter",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\n\n# Load the CSV file",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\ndf = pd.read_csv(\"/var/folders/c",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "z/vyh7y1d11xg881lsxsshnc",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "5c0000gn/T/tmpmylybr76/IE",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "Q51fUginflation.csv\")\n\n# Print the first few",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " rows of the dataframe\nprint(df.head())\n\n# Print the data",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " types of each column\nprint(df.dtypes)\n\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "# Print the summary statistics of the",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " dataframe\nprint(df.describe())",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpmylybr76/IEQ51fUginflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print the summary statistics of the dataframe\nprint(df.describe())"
-                },
-                "call_id": "c4c54781-a26e-427d-aea8-6d4b9829bbcc",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 213
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 223
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport code_interpreter\n\n# Load the",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " CSV file\ndf = pd.read_csv(\"/var/folders/cz/v",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "yh7y1d11xg881lsx",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "sshnc5c0000gn/T/tmpmylybr76",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "/IEQ51fUginflation.csv\")\n\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "# Print the first few rows of the dataframe",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\nprint(df.head())\n\n# Print the data types of",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " each column\nprint(df.dtypes)\n\n# Print the summary",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " statistics of the dataframe\nprint(df.describe())",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpmylybr76/IEQ51fUginflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print the summary statistics of the dataframe\nprint(df.describe())"
-                },
-                "call_id": "1f1ed34a-bffb-459d-9f64-eb66d13b2aa5",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 37
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is in your current directory, you can use the following code:\\n\\n```python\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the dataframe\\nprint(df.head())\\n# Print the summary of the dataframe\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will print the first 5 rows of the dataframe, the summary of the dataframe (including the index dtype and column count), and the description of the dataframe (including count, mean, std, min, 25%, 50%, 75%, max for each column).\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code will create a line plot of",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the average yearly inflation over time. The x",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "-axis represents the year and the y-axis represents the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " average inflation. Each point on the plot represents",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the average inflation for a particular year.\n\nPlease note that you",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " need to replace 'inflation.csv'",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with the actual path to your csv file. Also,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " this code assumes that the 'date' column in your csv",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " file is in a format that can be parsed by pandas' `to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_datetime` function. If the date is in a different",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " format, you may need to specify the format using the `format",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "` parameter of `to_datetime`.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is in your current directory, you can use the following code:\\n\\n```python\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the dataframe\\nprint(df.head())\\n# Print the summary of the dataframe\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will print the first 5 rows of the dataframe, the summary of the dataframe (including the index dtype and column count), and the description of the dataframe (including count, mean, std, min, 25%, 50%, 75%, max for each column).\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " data\ndf = pd.read_csv('inflation.csv",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "')\n\n# Convert 'date' column to datetime\ndf['date']",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " = pd.to_datetime(df['date'])\n\n# Group by year and calculate",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " average inflation\naverage_inflation = df.groupby(df['date'].dt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".year)['inflation'].mean()\n\n# Plot",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " the time series\nplt.figure(figsize=(",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "10,6))\nplt.plot(average",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_inflation.index, average_inflation.values",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ", marker='o')\nplt.title('Average Yearly Inflation')\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "plt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "(True)\nplt.show()",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert 'date' column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()"
-                },
-                "call_id": "ae9d3d8c-ece8-4f94-aa92-a6a93b08b43e",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is too large to be uploaded, you can provide a sample of the csv file and I can help you describe it. \\n\\nHere is an example of how you can describe a csv file using pandas:\\n\\n```\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the data\\nprint(df.head())\\n# Print the last 5 rows of the data\\nprint(df.tail())\\n# Print the summary statistics of the data\\nprint(df.describe())\\n# Print the data types of each column\\nprint(df.dtypes)\\n# Print the number of missing values in each column\\nprint(df.isnull().sum())\\n```\\n\\nThis will give you an idea of what the csv file contains.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code will create a line plot of the average yearly inflation over time. The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " x-axis represents the year and the y",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "-axis represents the average inflation. Each point on the plot represents",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the average inflation for a particular year.\n\nPlease note that you need",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to replace 'inflation.csv' with the actual path",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to your csv file. Also, this code assumes that the csv file",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " has a column named 'date' and another column named 'inflation",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'. If your csv file has different column names",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", you need to replace 'date' and 'inflation'",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with the actual column names.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is too large to be uploaded, you can provide a sample of the csv file and I can help you describe it. \\n\\nHere is an example of how you can describe a csv file using pandas:\\n\\n```\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the data\\nprint(df.head())\\n# Print the last 5 rows of the data\\nprint(df.tail())\\n# Print the summary statistics of the data\\nprint(df.describe())\\n# Print the data types of each column\\nprint(df.dtypes)\\n# Print the number of missing values in each column\\nprint(df.isnull().sum())\\n```\\n\\nThis will give you an idea of what the csv file contains.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "df = pd.read_csv('inflation.csv')\n\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "# Convert 'date' column to datetime\ndf['date']",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " = pd.to_datetime(df['date'])\n\n#",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " Group by year and calculate average inflation\naverage_inflation = df",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".groupby(df['date'].dt.year)['inflation'].mean()\n\n#",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " Plot the time series\nplt.figure(figsize=(10,6))\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".plot(average_inflation.index, average_inflation.values, marker='o",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "plt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert 'date' column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()"
-                },
-                "call_id": "91ad7e4c-2e89-4cb5-9d0b-753ceafb7eab",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a local file, you can use the `load_data` function from the `code_interpreter` library to load the file. \\n\\nHere is an example of how you can describe the csv file:\\n\\n```\\nimport pandas as pd\\nfrom code_interpreter import load_data\\n\\n# Load data\\ndf = load_data('inflation.csv')\\n\\n# Print summary of the data\\nprint(df.head())  # Print the first few rows of the data\\nprint(df.info())  # Print information about the data\\nprint(df.describe())  # Print summary statistics about the data\\n```\\n\\nPlease replace 'inflation.csv' with your actual csv file name. \\n\\nIf you are using a remote file, you need to provide the actual file path or the file itself. \\n\\nAlso, make sure that the file is in the correct format and that the pandas library can read it correctly.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"inflation.csv\\\")\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code will create a line plot of the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " average yearly inflation over time. The x",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "-axis represents the year and the y-axis represents the average inflation",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". The plot will also include a title, labels for the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " x and y axes, and a grid to make it easier",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to read.\n\nPlease replace \"inflation.csv\" with your",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " actual csv file name. \n\nAlso, make sure that the file",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " is in the correct format and that the pandas library can read it",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " correctly. \n\nIf your csv file has a different column name for",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the date, you will need to replace",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 'date' with the actual column name. \n\nIf your csv",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " file has a different column name for the inflation, you will need",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to replace 'inflation' with the actual column name. \n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "If you want to save the plot to a file instead of displaying",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " it, you can use the `savefig` method. For",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " example:\n\n```\nplt.savefig('average_inflation.png')\n```",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "2Yx8i0id",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:47:51.132007+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "N2BeNv66RcO7NRuE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 666
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "2Yx8i0id",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:47:51.132048+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "N2BeNv66RcO7NRuE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 200
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "2Yx8i0id",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:47:51.132054+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "N2BeNv66RcO7NRuE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 866
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a local file, you can use the `load_data` function from the `code_interpreter` library to load the file. \\n\\nHere is an example of how you can describe the csv file:\\n\\n```\\nimport pandas as pd\\nfrom code_interpreter import load_data\\n\\n# Load data\\ndf = load_data('inflation.csv')\\n\\n# Print summary of the data\\nprint(df.head())  # Print the first few rows of the data\\nprint(df.info())  # Print information about the data\\nprint(df.describe())  # Print summary statistics about the data\\n```\\n\\nPlease replace 'inflation.csv' with your actual csv file name. \\n\\nIf you are using a remote file, you need to provide the actual file path or the file itself. \\n\\nAlso, make sure that the file is in the correct format and that the pandas library can read it correctly.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " data\ndf = pd.read_csv(\"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "inflation.csv\")\n\n# Convert date column to datetime\ndf",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "['date'] = pd.to_datetime(df['date'])\n\n# Group",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " by year and calculate average inflation\naverage_inflation = df.groupby",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "(df['date'].dt.year)['inflation'].mean()\n\n#",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " Plot average yearly inflation as a time series\nplt.figure(figsize=(",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "10,6))\nplt.plot(average_in",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "flation.index, average_inflation.values, marker='o')\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".title('Average Yearly Inflation')\nplt.xlabel('Year')\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "plt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "()",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv(\"inflation.csv\")\n\n# Convert date column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot average yearly inflation as a time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()"
-                },
-                "call_id": "cfae3ff5-49f8-439d-b740-603bc93fb5a3",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "JNrmlTTc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:47:39.920493+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "N2BeNv66RcO7NRuE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 476
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "JNrmlTTc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:47:39.920519+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "N2BeNv66RcO7NRuE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "JNrmlTTc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:47:39.920522+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "N2BeNv66RcO7NRuE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 486
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n       'Oct', 'Nov', 'Dec'],\\n      dtype='object')\\nDatatype of the columns are: Year      int64\\nJan     float64\\nFeb     float64\\nMar     float64\\nApr     float64\\nMay     float64\\nJun     float64\\nJul     float64\\nAug     float64\\nSep     float64\\nOct     float64\\nNov     float64\\nDec     float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nIt appears that this CSV file contains monthly inflation rates for different years. The 'Year' column represents the year, and the rest of the columns represent the inflation rates for each month of the year.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Calculate average yearly inflation\\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df['Year'], df['Average'])\\nplt.xlabel('Year')\\nplt.ylabel('Average Yearly Inflation')\\nplt.title('Average Yearly Inflation Over Time')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code will create a line plot of the average",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " yearly inflation over time. The x-axis represents",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the year, and the y-axis represents the average",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " yearly inflation. The plot will show the trend of average yearly inflation",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " over the years.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 633
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 56
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 689
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n       'Oct', 'Nov', 'Dec'],\\n      dtype='object')\\nDatatype of the columns are: Year      int64\\nJan     float64\\nFeb     float64\\nMar     float64\\nApr     float64\\nMay     float64\\nJun     float64\\nJul     float64\\nAug     float64\\nSep     float64\\nOct     float64\\nNov     float64\\nDec     float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nIt appears that this CSV file contains monthly inflation rates for different years. The 'Year' column represents the year, and the rest of the columns represent the inflation rates for each month of the year.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport matplotlib.pyplot",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " as plt\n\n# Load data\ndf = pd.read_csv(\"/",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "var/folders/cz/vyh7y1d11",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "xg881lsxsshnc5c0000gn/T",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "/tmp_d_cdeif/UuctHlJzinflation.csv\")\n\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "# Calculate average yearly inflation\ndf['Average",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "'] = df[['Jan', 'Feb', 'Mar',",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " 'Apr', 'May', 'Jun',",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " 'Jul', 'Aug', 'Sep', 'Oct', '",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "Nov', 'Dec']].mean(axis=1)\n\n# Plot time series",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\nplt.figure(figsize=(10,6))\nplt.plot(df['Year",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "'], df['Average'])\nplt.xlabel('Year')\nplt.ylabel('Average",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " Yearly Inflation')\nplt.title('Average Yearly Inflation Over",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " Time')\nplt.grid(True)\nplt.show()",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmp_d_cdeif/UuctHlJzinflation.csv\")\n\n# Calculate average yearly inflation\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\n\n# Plot time series\nplt.figure(figsize=(10,6))\nplt.plot(df['Year'], df['Average'])\nplt.xlabel('Year')\nplt.ylabel('Average Yearly Inflation')\nplt.title('Average Yearly Inflation Over Time')\nplt.grid(True)\nplt.show()"
-                },
-                "call_id": "f953fd92-9413-4968-9ffa-f85ddea173dc",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 453
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 463
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n       'Oct', 'Nov', 'Dec'],\\n      dtype='object')\\nDatatype of the columns are: Year      int64\\nJan     float64\\nFeb     float64\\nMar     float64\\nApr     float64\\nMay     float64\\nJun     float64\\nJul     float64\\nAug     float64\\nSep     float64\\nOct     float64\\nNov     float64\\nDec     float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nIt appears that this CSV file contains monthly inflation rates for different years. The 'Year' column represents the year, and the rest of the columns represent the inflation rates for each month of the year.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Calculate average yearly inflation\\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df['Year'], df['Average'])\\nplt.xlabel('Year')\\nplt.ylabel('Average Yearly Inflation')\\nplt.title('Average Yearly Inflation Over Time')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code will create a line plot of the average yearly inflation over",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " time. The x-axis represents the year, and the y-axis",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " represents the average yearly inflation. The plot will show the trend of",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " average yearly inflation over the years.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 631
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 56
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 687
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n       'Oct', 'Nov', 'Dec'],\\n      dtype='object')\\nDatatype of the columns are: Year      int64\\nJan     float64\\nFeb     float64\\nMar     float64\\nApr     float64\\nMay     float64\\nJun     float64\\nJul     float64\\nAug     float64\\nSep     float64\\nOct     float64\\nNov     float64\\nDec     float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nIt appears that this CSV file contains monthly inflation rates for different years. The 'Year' column represents the year, and the rest of the columns represent the inflation rates for each month of the year.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport matplotlib",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".pyplot as plt\n\n# Load data",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\ndf = pd.read_csv(\"/var/folders/c",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "z/vyh7y1d11xg881",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "lsxsshnc5c0000gn/T/tmpmy",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "lybr76/Dhwctgpwinflation.csv\")\n\n#",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " Calculate average yearly inflation\ndf['Average'] = df[['",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "Jan', 'Feb', 'Mar', 'Apr",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "', 'May', 'Jun', 'Jul', '",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "Aug', 'Sep', 'Oct', 'Nov', 'Dec",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "']].mean(axis=1)\n\n# Plot time series\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".figure(figsize=(10,6))\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "plt.plot(df['Year'], df['Average'])\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".xlabel('Year')\nplt.ylabel('Average Yearly Inflation')\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "plt.title('Average Yearly Inflation Over Time')\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".grid(True)\nplt.show()",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpmylybr76/Dhwctgpwinflation.csv\")\n\n# Calculate average yearly inflation\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\n\n# Plot time series\nplt.figure(figsize=(10,6))\nplt.plot(df['Year'], df['Average'])\nplt.xlabel('Year')\nplt.ylabel('Average Yearly Inflation')\nplt.title('Average Yearly Inflation Over Time')\nplt.grid(True)\nplt.show()"
-                },
-                "call_id": "73dbb112-a028-48fd-8664-a6c408d1f13d",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 452
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 462
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n       'Oct', 'Nov', 'Dec'],\\n      dtype='object')\\nDatatype of the columns are: Year      int64\\nJan     float64\\nFeb     float64\\nMar     float64\\nApr     float64\\nMay     float64\\nJun     float64\\nJul     float64\\nAug     float64\\nSep     float64\\nOct     float64\\nNov     float64\\nDec     float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nThe 'Year' column likely contains the year for which the inflation rates are given. The other columns ('Jan' to 'Dec') likely contain the inflation rates for each month of the year.\\n\\nPlease note that the actual data in the CSV file is not provided, so the above description is based on the structure of the file.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Calculate average yearly inflation\\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df['Year'], df['Average'])\\nplt.xlabel('Year')\\nplt.ylabel('Average Yearly Inflation')\\nplt.title('Average Yearly Inflation Over Time')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code will create a line plot of the average yearly inflation over",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " time. The x-axis represents the year and the y-axis",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " represents the average yearly inflation. The plot will show the trend",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of average yearly inflation over the years.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 661
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 55
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 716
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n       'Oct', 'Nov', 'Dec'],\\n      dtype='object')\\nDatatype of the columns are: Year      int64\\nJan     float64\\nFeb     float64\\nMar     float64\\nApr     float64\\nMay     float64\\nJun     float64\\nJul     float64\\nAug     float64\\nSep     float64\\nOct     float64\\nNov     float64\\nDec     float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nThe 'Year' column likely contains the year for which the inflation rates are given. The other columns ('Jan' to 'Dec') likely contain the inflation rates for each month of the year.\\n\\nPlease note that the actual data in the CSV file is not provided, so the above description is based on the structure of the file.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "df = pd.read_csv(\"/var/folders/cz/vyh7",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "y1d11xg881lsxsshnc5c0000",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "gn/T/tmpfsp7c9_g/Aih5TPOuin",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "flation.csv\")\n\n# Calculate average yearly inflation",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\ndf['Average'] = df[['Jan', 'Feb',",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " 'Mar', 'Apr', 'May', 'Jun', '",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "Jul', 'Aug', 'Sep', 'Oct', 'Nov",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "', 'Dec']].mean(axis=1)\n\n# Plot time series",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\nplt.figure(figsize=(10,6))\nplt.plot(df['",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "Year'], df['Average'])\nplt.xlabel('Year')\nplt.ylabel",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "('Average Yearly Inflation')\nplt.title('",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "Average Yearly Inflation Over Time')\nplt.grid(True)\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".show()",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpfsp7c9_g/Aih5TPOuinflation.csv\")\n\n# Calculate average yearly inflation\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\n\n# Plot time series\nplt.figure(figsize=(10,6))\nplt.plot(df['Year'], df['Average'])\nplt.xlabel('Year')\nplt.ylabel('Average Yearly Inflation')\nplt.title('Average Yearly Inflation Over Time')\nplt.grid(True)\nplt.show()"
-                },
-                "call_id": "dce1b106-06e1-4163-ae85-f9a2491f4375",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 480
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 490
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "It",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " seems that the file \"/var/folders",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "/cz/vyh7y1d11xg881",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "lsxsshnc5c0000gn/T/tmp4ed7",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "p2bg/UZ0Z335vinflation.csv\" does",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not exist. \n\nTo describe the csv file, you need to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " provide the actual file path or the file itself. If the file",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " is too large to be uploaded, you can provide a sample",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of the csv file and I can help you describe it. \n\nHere is",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " an example of how you can describe a",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " csv file using pandas:\n\n```\nimport pandas as pd\n#",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Load data\ndf = pd.read_csv('",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "inflation.csv')\n# Print the first 5 rows of the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " data\nprint(df.head())\n# Print the last 5 rows of the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " data\nprint(df.tail())\n# Print the summary statistics of the data\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "print(df.describe())\n# Print the data types of each column\nprint(df",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".dtypes)\n# Print the number of missing values in each column\nprint",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "(df.isnull().sum())\n```\n\nThis will give you an idea of",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " what the csv file contains.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n       'Oct', 'Nov', 'Dec'],\\n      dtype='object')\\nDatatype of the columns are: Year      int64\\nJan     float64\\nFeb     float64\\nMar     float64\\nApr     float64\\nMay     float64\\nJun     float64\\nJul     float64\\nAug     float64\\nSep     float64\\nOct     float64\\nNov     float64\\nDec     float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " CSV file contains 10 rows and 13 columns. The columns",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " are named 'Year', 'Jan', 'Feb', 'Mar',",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 'Apr', 'May', 'Jun', 'Jul', 'Aug",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "', 'Sep', 'Oct', 'Nov', '",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "Dec'. The data types of these columns are int64 for '",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "Year' and float64 for the rest.\n\nIt appears that this CSV",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " file contains monthly inflation rates for different years. The 'Year' column",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " represents the year, and the rest of the columns represent the inflation rates",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " for each month of the year",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 326
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 125
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 451
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "/folders/cz/vyh7y1d11xg881lsx",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "sshnc5c0000gn/T/tmp_d_cdeif/Uuct",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "HlJzinflation.csv\")\n# Rows\nprint(\"Number of rows",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " of the data are:\", len(df.columns))\n# Column names\nprint(\"",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "Columns of the data are:\", df.columns)\n# Column dtypes\nprint",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "(\"Datatype of the columns are:\", df.dtypes)",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmp_d_cdeif/UuctHlJzinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)"
-                },
-                "call_id": "479e0208-711f-4318-b284-745599a9fb9c",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 36
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 46
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n       'Oct', 'Nov', 'Dec'],\\n      dtype='object')\\nDatatype of the columns are: Year      int64\\nJan     float64\\nFeb     float64\\nMar     float64\\nApr     float64\\nMay     float64\\nJun     float64\\nJul     float64\\nAug     float64\\nSep     float64\\nOct     float64\\nNov     float64\\nDec     float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " CSV file contains 10 rows and 13 columns. The columns",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " are named 'Year', 'Jan', 'Feb', 'Mar",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "', 'Apr', 'May', 'Jun', 'Jul',",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 'Aug', 'Sep', 'Oct',",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 'Nov', 'Dec'. The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " data types of these columns are int64 for",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 'Year' and float64 for the rest",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n\nIt appears that this CSV file contains monthly inflation rates for",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " different years. The 'Year' column represents the year,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and the rest of the columns represent the inflation rates for each",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " month of the year.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 325
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 125
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 450
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\n# Load data\ndf = pd.read",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_csv(\"/var/folders/cz/vyh7y1",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "d11xg881lsxsshnc",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "5c0000gn/T/tmpmyly",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "br76/Dhwctgpwinflation.csv\")\n#",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " Rows\nprint(\"Number of rows and columns in the data:\",",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " df.shape)\n# Columns\nprint(\"Columns of the data are",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ":\", len(df.columns))\n# Column names\nprint(\"Columns of",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " the data are:\", df.columns)\n# Column dt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "ypes\nprint(\"Datatype of the columns",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " are:\", df.dtypes)",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpmylybr76/Dhwctgpwinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)"
-                },
-                "call_id": "f1d86c1d-75bd-43f3-9117-a906e41598f8",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 36
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 46
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:02bc2\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:e40e6\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:200a9\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:e40e6\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:200a9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:e40e6\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:e40e6\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:e40e6\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:e40e6\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:e40e6\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can follow these steps:\n\n1.  Install Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and its dependencies.\n2.  Download the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Llama2 weights and tokenizer.\n3.  Use the `",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "lora_llama2_7b` model in Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une, which applies LoRA to the Q and V projections by",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " default.\n4.  Load the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " base model weights into the LoRA model without",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " any conversion necessary.\n5.  Set only LoRA parameters",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to trainable.\n6.  Run the LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " finetuning recipe in Torchtune with the desired configuration.\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "You can also experiment with different LoRA configurations, such as applying Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA to all linear layers in the self-attention",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", increasing the rank, or scaling alpha and rank together.\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "Note that LoRA can be beneficial for",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " reducing memory usage during fine-tuning, but it may also",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " impact model performance. You can trade off memory and model performance by",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " adjusting the LoRA configuration and running experiments with different settings.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 158
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 212
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 370
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:02bc2\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:e40e6\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:200a9\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:e40e6\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:200a9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "knowledge_search\", \"parameters\": {\"query\": \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "How to use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "6ee142d9-1a65-433e-a681-f20066a2e1f7",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 117
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:02bc2\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:e40e6\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:200a9\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:e40e6\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:200a9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une based on the documentation you provided.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 75
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 35
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:16a6a\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:cc255\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:7a06a\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:cc255\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:7a06a\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:cc255\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:cc255\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:cc255\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:cc255\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:cc255\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can follow these steps:\n\n1. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Install Torchtune and its dependencies.\n2.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "  Download the Llama2 weights and tokenizer",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n3.  Use the `lora_llama2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_7b` model in Torchtune, which applies LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to the Q and V projections by default.\n4.  Set",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the `lora_attn_modules` argument to apply LoRA to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " all linear layers in the self-attention.\n5. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Increase the rank and alpha values to experiment with different LoRA configurations.\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "6.  Run the LoRA finetuning",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " recipe in Torchtune using the `lora_finetune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_distributed` command.\n7.  Monitor the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " loss curves and adjust the LoRA configuration as needed to trade off",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " memory and model performance.\n\nBy following these steps, you can effectively use",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " LoRA in Torchtune to fine-tune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Llama2 models with a low memory footprint.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 158
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 206
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 364
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:16a6a\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:cc255\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:7a06a\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:cc255\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:7a06a\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search\", \"parameters",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\": {\"query\": \"How to use LoRA in Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "a7b02498-0a50-40c2-abf2-563d4d26d01f",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 117
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:16a6a\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:cc255\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:7a06a\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:cc255\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:7a06a\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torchtune based on the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " documentation you provided. What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 75
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 35
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:24443\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:961ff\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:b49f7\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:961ff\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:b49f7\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:961ff\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:961ff\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:961ff\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:961ff\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:961ff\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une, you can follow these steps:\n\n1. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Install Torchtune and its dependencies.\n2.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "  Download the Llama2 weights and tokenizer.\n3",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".  Use the `lora_llama2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_7b`",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " model in Torchtune, which applies LoRA to the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Q and V projections",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " by default.\n4.  Set the `lora_at",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "tn_modules` argument to apply LoRA to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " all linear layers in the self-attention.\n5.  Increase",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the rank and alpha values to experiment with different LoRA configurations",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n6.  Use the `lora",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_finetune_distributed` recipe in Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une to run a LoRA finetune with two",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " GPUs.\n7.  Modify the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " `lora_finetune_distributed` config",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to apply LoRA to all linear layers in the self-",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "attention and increase the rank and alpha values.\n8.  Run",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the experiment using the modified config.\n\nBy",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " following these steps, you can use LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " in Torchtune to fine-tune a Llama2 model",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with a low memory footprint and experiment with different LoRA configurations",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:24443\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:961ff\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:b49f7\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:961ff\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:b49f7\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search\", \"parameters\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " {\"query\": \"How to use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "0d852474-6781-48ed-b8c1-778bd0f4e7f0",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:24443\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:961ff\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:b49f7\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:961ff\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:b49f7\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune based on the documentation you provided. What's your",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:2a4c4\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:d4e29\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:d68cc\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:d4e29\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d68cc\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:d4e29\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:d4e29\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:d4e29\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:d4e29\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d4e29\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these steps:\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "1.  Install Torchtune and its dependencies.\n2. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Download the Llama2 weights and tokenizer.\n3.  Use the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " `lora_llama2_7b",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "` model in Torchtune, which applies LoRA to the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Q and V projections by default.\n4.  Load the base model",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " weights into the LoRA model without any conversion necessary.\n5. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Set only LoRA parameters to trainable.\n6.  Run the Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA finetuning recipe in Torchtune with the desired configuration.\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "You can also experiment with different LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " configurations, such as applying LoRA to all linear layers in the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " self-attention, increasing the rank, or scaling alpha",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and rank together.\n\nNote that LoRA can be beneficial for reducing memory usage",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " during fine-tuning, but it may also impact model performance. You",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can trade off memory and model performance by adjusting the LoRA configuration and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " running experiments with different settings.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:2a4c4\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:d4e29\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:d68cc\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:d4e29\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d68cc\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"knowledge_search\", \"parameters\": {\"query\": \"How",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "6070c836-0c9c-4f87-ba52-d9bf9ed44195",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:2a4c4\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:d4e29\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:d68cc\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:d4e29\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d68cc\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Tor",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "chtune based on the documentation you provided",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:7bf28\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:b299f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:af719\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:b299f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:af719\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b299f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:b299f\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:b299f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:b299f\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:b299f\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can follow these steps:\n\n1.  Install Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and its dependencies.\n2.  Download the Llama2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " weights and tokenizer.\n3.  Use the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " `lora_llama2_7b` model",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " in Torchtune, which applies LoRA to the Q and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " V projections by default.\n4.  Load",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the base model weights into the LoRA model without any conversion necessary",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n5.  Set only LoRA parameters to trainable.\n6",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".  Run the LoRA finetuning recipe",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " in Torchtune with the desired configuration.\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "You can also experiment with different LoRA configurations, such as applying",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " LoRA to all linear layers in the self-attention, increasing",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the rank, or scaling alpha and rank together.\n\nBy following these",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " steps, you can use LoRA in Torchtune to fine",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "-tune a Llama2 model with a",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " low memory footprint and achieve good performance.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 158
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 200
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 358
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:7bf28\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:b299f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:af719\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:b299f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:af719\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"knowledge_search\", \"parameters",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\": {\"query\": \"How to use Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "3d9a3bd1-4a05-4feb-b5a2-eed7a7a24f1b",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 117
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:7bf28\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:b299f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:af719\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:b299f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:af719\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune based on the documentation you provided",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 75
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 35
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c1f5\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:13786\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:f9c19\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f9c19\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:13786\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:13786\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:13786\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " follow these steps:\n\n1.  Install",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune and its dependencies.\n2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".  Download the Llama2 weights and tokenizer.\n3.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "  Use the `lora_llama2_7b`",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " model in Torchtune, which applies",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " LoRA to the Q and V projections by default.\n4.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "  Load the base model weights into the LoRA model without any",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " conversion necessary.\n5.  Set only LoRA parameters to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " trainable.\n6.  Run the LoRA finetuning recipe",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " in Torchtune with the desired configuration.\n\nYou can also experiment",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with different LoRA configurations, such as applying LoRA to all",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " linear layers in the self-attention, increasing the rank, or",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " scaling alpha and rank together.\n\nBy following these steps,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " you can use LoRA in Torchtune to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " fine-tune a Llama2 model with a",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " low memory footprint and achieve good performance.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "fHo5RmyV",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:04.498360+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "NIVx0ka-TmKDiZaU",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 158
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "fHo5RmyV",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:04.498396+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "NIVx0ka-TmKDiZaU",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 200
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "fHo5RmyV",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:04.498403+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "NIVx0ka-TmKDiZaU",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 358
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c1f5\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:13786\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:f9c19\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f9c19\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "name\": \"knowledge_search\", \"parameters",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\": {\"query\": \"How to use LoRA in Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "7815c1ab-fbdf-42e8-84a7-b1f74f67d863",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "KM-vILDG",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:01.270069+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "NIVx0ka-TmKDiZaU",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 117
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "KM-vILDG",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:01.270143+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "NIVx0ka-TmKDiZaU",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "KM-vILDG",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:01.270151+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "NIVx0ka-TmKDiZaU",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c1f5\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:13786\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:f9c19\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f9c19\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une based on the documentation you provided. What's your first",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "5yc3Hts6",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:48:59.857021+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6KRztpbwTwquLEUn",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 75
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "5yc3Hts6",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:48:59.857048+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6KRztpbwTwquLEUn",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 35
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "5yc3Hts6",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:48:59.857055+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "6KRztpbwTwquLEUn",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c735\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:ef2c1\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:4857b\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:ef2c1\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:4857b\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:ef2c1\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:ef2c1\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:ef2c1\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:ef2c1\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:ef2c1\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " you can follow these steps:\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "1.  Install Torchtune and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " its dependencies.\n2.  Download",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the Llama2 weights and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " tokenizer.\n3. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Use the `lora_llama2_7",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "b` model in Torchtune, which applies",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " LoRA to the Q and V projections by default.\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "4.  Load the base model weights into the LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " model without any conversion necessary.\n5.  Set only Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA parameters to trainable.\n6.  Run the LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " finetuning recipe in Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with the desired configuration.\n\nYou can also experiment",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with different LoRA configurations, such as applying LoRA to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " all linear layers in the self-attention,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " increasing the rank, or scaling",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " alpha and rank together.\n\nNote that LoRA can be",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " beneficial for reducing memory usage during fine-tuning",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", but it may also impact model performance. You can trade",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " off memory and model performance by adjusting the LoRA configuration and running",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " experiments with different settings.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 158
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 212
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 370
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c735\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:ef2c1\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:4857b\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:ef2c1\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:4857b\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_search\", \"parameters\": {\"query\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"How to use LoRA in Tor",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "chtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "8414f84a-98b1-41eb-90bd-bce084da79eb",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 117
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c735\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:ef2c1\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:4857b\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:ef2c1\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:4857b\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Tor",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "chtune based on the documentation you provided.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 75
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 35
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:9050a\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:c4e00\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:15efa\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:c4e00\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:15efa\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:c4e00\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:c4e00\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:c4e00\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:c4e00\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:c4e00\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " steps:\n\n1.  Install Torchtune and its dependencies",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n2.  Download the Llama2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " weights and tokenizer.\n3.  Use the `lora",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_llama2_7b` model in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune, which applies LoRA to the Q and V",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " projections by default.\n4.  Load the base model weights into",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the LoRA model without any conversion necessary.\n5.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "  Set only LoRA parameters to trainable.\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "6.  Run the LoRA finetuning recipe in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune with the desired configuration.\n\nYou",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can also experiment with different LoRA configurations, such as applying Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA to all linear layers in the self-attention",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", increasing the rank, or scaling alpha and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " rank together.\n\nNote that LoRA can be beneficial",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " for reducing memory usage during fine-tuning,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " but it may also impact model",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " performance. You can trade off memory and model performance by",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " adjusting the LoRA configuration and running experiments with different settings.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 158
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 212
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 370
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:9050a\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:c4e00\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:15efa\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:c4e00\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:15efa\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "knowledge_search\", \"parameters\": {\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "query\": \"How to use LoRA in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "0784780b-c3dc-4f4a-a37f-e75e83e9be61",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 117
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:9050a\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:c4e00\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:15efa\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:c4e00\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:15efa\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torchtune based on the documentation",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " you provided. What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 75
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 35
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:a4c57\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:46132\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:392a8\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:46132\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:392a8\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:46132\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:46132\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:46132\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:46132\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:46132\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these steps",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ":\n\n1.  Install Torchtune and its dependencies.\n2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".  Download the Llama2 weights and tokenizer.\n3. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Use the `lora_llama2_7b` model in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune, which applies LoRA to the Q and V projections",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " by default.\n4.  Load the base model weights into the Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA model without any conversion necessary.\n5.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "  Set only LoRA parameters to trainable.\n6.  Run",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the LoRA finetuning recipe in Torchtune with the desired",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " configuration.\n\nYou can also experiment with different LoRA configurations, such as",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " applying LoRA to all linear layers in the self-attention, increasing",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the rank, or scaling alpha and rank together.\n\nBy following these steps",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", you can use LoRA in Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une to fine-tune a Llama2 model with a low",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " memory footprint and achieve good performance.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:a4c57\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:46132\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:392a8\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:46132\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:392a8\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"knowledge_search\", \"parameters\": {\"query\": \"How to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "45ec3014-ff3f-4d0b-9649-30a299f7b9d4",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:a4c57\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:46132\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:392a8\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:46132\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:392a8\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " questions about Torchtune based on the documentation you provided.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b222e\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:1b69d\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:deca9\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:1b69d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:deca9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:1b69d\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:1b69d\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:1b69d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:1b69d\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:1b69d\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these steps:\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "1.  Install Torchtune and its dependencies",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n2.  Download the Llama2 weights and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " tokenizer.\n3.  Construct a",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Llama2 model with LoRA layers using `lora_ll",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ama2_7b`.\n4.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "  Load the base model weights into the LoRA model without",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " any conversion necessary.\n5.  Set only LoRA parameters to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " trainable.\n6.  Run a LoRA finetune using",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune's `LoRA recipe`.\n\nYou can also experiment",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with different LoRA configurations, such as applying LoRA to all",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " linear layers in the self-attention,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " increasing the rank to 16 or ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "32, and scaling alpha and rank together.\n\nNote that LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can be beneficial for reducing memory usage during fine-tuning",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", but it may also impact model performance",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". You can trade off memory and model",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " performance by adjusting the LoRA configuration.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "BHazvRV1",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:10.165627+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1NwedpozRqOVQXRs",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 158
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "BHazvRV1",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:10.165662+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1NwedpozRqOVQXRs",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 202
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "BHazvRV1",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:10.165670+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1NwedpozRqOVQXRs",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 360
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b222e\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:1b69d\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:deca9\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:1b69d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:deca9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search\", \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "parameters\": {\"query\": \"How to use LoRA in Tor",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "chtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "c92271a7-37e2-4396-aa7f-5805b9273a71",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "Z6HS-lIg",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:08.648346+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1NwedpozRqOVQXRs",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 117
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "Z6HS-lIg",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:08.648375+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1NwedpozRqOVQXRs",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "Z6HS-lIg",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:08.648382+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1NwedpozRqOVQXRs",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b222e\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:1b69d\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:deca9\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:1b69d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:deca9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune based on the documentation you provided. What's your",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "o33PSCts",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:07.268876+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "edTwKHK5Q4K8yCqt",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 75
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "o33PSCts",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:07.268906+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "edTwKHK5Q4K8yCqt",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 35
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "o33PSCts",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:49:07.268914+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "edTwKHK5Q4K8yCqt",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:bbddb\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:15b86\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:83901\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:15b86\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:83901\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:15b86\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:15b86\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:15b86\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:15b86\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:15b86\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " can follow these steps:\n\n1.  Install Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and its dependencies.\n2.  Download the Llama2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " weights and tokenizer.\n3.  Use the `l",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ora_llama2_7b` model in Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une, which applies LoRA to the Q",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and V projections by default.\n4.  Load the base",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " model weights into the LoRA model without",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " any conversion necessary.\n5.  Set only LoRA parameters",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to trainable.\n6.  Run the LoRA finet",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "uning recipe in Torchtune with the desired",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " configuration.\n\nYou can also experiment with different LoRA configurations,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " such as applying LoRA to all linear layers in the self",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "-attention, increasing the rank, or scaling alpha and rank",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " together.\n\nNote that LoRA can be beneficial for",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " reducing memory usage during fine-tuning, but it may also",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " impact model performance. You can trade off memory and model performance",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " by adjusting the LoRA configuration and running experiments with different settings",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 158
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 212
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 370
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:bbddb\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:15b86\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:83901\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:15b86\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:83901\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search\",",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"parameters\": {\"query\": \"How to use LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "548b1430-be4a-4c22-9430-62bda6dd150c",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 117
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:bbddb\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:15b86\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:83901\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:15b86\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:83901\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torchtune based on",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the documentation you provided. What's your first question",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 75
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 35
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:da8ed\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:65275\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:f4ddd\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:65275\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f4ddd\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:65275\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:65275\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:65275\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:65275\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:65275\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these steps:\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "1.  Install Torchtune and its dependencies.\n2. ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Download the Llama2 weights and tokenizer.\n3.  Use the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " `lora_llama2_7b` model in Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une, which applies LoRA to the Q and V projections by default",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n4.  Set the `lora_attn_modules` argument",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to apply LoRA to all linear layers in the self-attention.\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "5.  Increase the rank and alpha values to experiment with different Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA configurations.\n6.  Run the LoRA finetuning recipe",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " in Torchtune using the `lora_finetune_distributed",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "` command.\n7.  Monitor the loss curves and adjust the Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA configuration as needed to trade off memory and model performance.\n\nBy following",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " these steps, you can effectively use LoRA in Torchtune to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " fine-tune Llama2 models with a low memory footprint.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 158
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 206
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 364
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:da8ed\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:65275\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:f4ddd\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:65275\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f4ddd\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_search\", \"parameters\": {\"query\": \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "How to use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "b1a5c1c5-905e-4206-95f6-e30f9b07376d",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 117
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:da8ed\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:65275\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:f4ddd\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:65275\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f4ddd\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une based on the documentation you provided. What's your first",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 75
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 35
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:ea3f6\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:5c435\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:91d52\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:5c435\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:91d52\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:5c435\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:5c435\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:5c435\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:5c435\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:5c435\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these steps",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ":\n\n1.  Install Torchtune and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " its dependencies.\n2.  Download the Llama2 weights",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and tokenizer.\n3.  Use the `lora_ll",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ama2_7b`",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " model in Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une, which applies LoRA to the Q and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " V projections by default.\n4.  Set the `l",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ora_attn_modules` argument to apply",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " LoRA to all linear layers in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the self-attention.\n5.  Increase the `l",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ora_rank` and `lora_alpha` arguments to improve model",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " performance.\n6.  Run the LoRA finetuning recipe",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " in Torchtune using the `lora_finetune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_distributed` command.\n\nBy following",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " these steps, you can apply Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA to your Llama2 model and fine-tune it",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " using Torchtune.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:ea3f6\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:5c435\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:91d52\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:5c435\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:91d52\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "knowledge_search\", \"parameters\": {\"query",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\": \"How to use LoRA in Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "3f9aaa8a-ca61-4a51-830a-e9920d3d8ec5",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:ea3f6\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:5c435\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:91d52\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:5c435\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:91d52\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about Torchtune based on the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " documentation you provided. What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:fa9cd\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:6dc04\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:6f75f\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:6dc04\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:6f75f\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:6dc04\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:6dc04\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:6dc04\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:6dc04\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:6dc04\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow these",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " steps:\n\n1.  Install Torchtune and its dependencies",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n2.  Download the Llama2 weights and tokenizer",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".\n3.  Use the `lora_llama2",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_7b` model in Torchtune, which applies",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " LoRA to the Q and V projections by default.\n4",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".  Set the `lora_attn_modules` argument to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " apply LoRA to all linear layers in the self-attention.\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "5.  Increase the `lora_rank` and `l",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ora_alpha` arguments to improve model performance.\n6.  Run",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the LoRA finetuning recipe in Torchtune using the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " `lora_finetune_distributed`",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " command.\n\nBy following these steps, you can apply LoRA to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " your Llama2 model and fine-tune it using Torcht",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "une.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 158
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 185
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 343
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:fa9cd\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:6dc04\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:6f75f\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:6dc04\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:6f75f\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search\", \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "parameters\": {\"query\": \"How to use LoRA in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "How to use LoRA in Torchtune"
-                },
-                "call_id": "d4e8b8eb-a0be-4434-b270-48315bf20723",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 117
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:fa9cd\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"<TEMP_FILE>\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: <TEMP_FILE>    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:6dc04\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:6f75f\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:6dc04\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:6f75f\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help you answer questions about",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Torchtune based on the documentation you provided",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". What's your first question?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 75
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 35
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\": {\"query\": \"Torchtune documentation\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Torchtune documentation"
-                },
-                "call_id": "7c426640-e3ba-4f25-8c9e-bf9feb88718a",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 39
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 49
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \\\"role\\\": \\\"system\\\",\\n            \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " attention type used by Llama3-8B is grouped-query attention",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 80
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 26
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 106
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \\\"role\\\": \\\"system\\\",\\n            \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " attention type used by Llama3-8B is grouped",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "-query attention.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 80
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 26
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 106
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \\\"role\\\": \\\"system\\\",\\n            \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " attention type used by Llama3-8B is grouped",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "-query attention.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 80
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 26
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 106
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \\\"role\\\": \\\"system\\\",\\n            \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " attention type used by Llama3-8",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "B is grouped-query attention.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 80
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 26
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 106
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "    \"type\": \"function\",\n    \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "name\": \"knowledge_search\",\n    \"parameters\": {\n        \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "query\": \"Llama3-8B",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " attention type\"\n    }\n}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Llama3-8B attention type"
-                },
-                "call_id": "0a634543-9512-4a3c-b665-3b077996acab",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 48
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 88
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\": {\"query\": \"Llama3-8B attention type\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Llama3-8B attention type"
-                },
-                "call_id": "f6cf7afb-20b1-472b-983e-1281fdf6e5ca",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 50
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"current CEO of Meta\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"{\\\"query\\\": \\\"current CEO of Meta\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\\\", \\\"score\\\": 0.8190992, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.79099923, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meet the Executive CSuite Team of Meta (Facebook) [2025]\\\", \\\"url\\\": \\\"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\\\", \\\"content\\\": \\\"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\\\", \\\"score\\\": 0.7602419, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) - Investopedia\\\", \\\"url\\\": \\\"https://www.investopedia.com/terms/m/mark-zuckerberg.asp\\\", \\\"content\\\": \\\"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg is a self-taught computer programmer and co-founder, chair, and chief executive officer of Meta (META), formerly known as Facebook. Mark Zuckerberg is a self-taught computer programmer and the co-founder, chair, and CEO of Meta (formerly Facebook). In April 2018, Zuckerberg testified on Capitol Hill about Facebook's use of users' information, including the sharing of 87 million users' information to Cambridge Analytica. Technically, Mark Zuckerberg makes a salary of $1 a year at Facebook. Booker Join With Facebook Founder and CEO Mark Zuckerberg to Advance a National Model for Improving Public Schools.\\\\\\\"\\\", \\\"score\\\": 0.74697095, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Forbes\\\", \\\"url\\\": \\\"https://www.forbes.com/profile/mark-zuckerberg/\\\", \\\"content\\\": \\\"Meta CEO Mark Zuckerberg \\\\u201cloved\\\\u201d an image on Facebook known as \\\\\\\"Challah Horse\\\\\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\\\", \\\"score\\\": 0.6410185, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " current CEO of Meta is Mark Zuckerberg.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "LWwngTMJ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:47:24.889991+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "K0psyd28TdSkb8LK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 1203
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "LWwngTMJ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:47:24.890015+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "K0psyd28TdSkb8LK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 19
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "LWwngTMJ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-06T04:47:24.890017+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "K0psyd28TdSkb8LK",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 1222
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"current CEO of Meta\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"{\\\"query\\\": \\\"current CEO of Meta\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Meet the Executive CSuite Team of Meta (Facebook) [2025]\\\", \\\"url\\\": \\\"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\\\", \\\"content\\\": \\\"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\\\", \\\"score\\\": 0.7602419, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Forbes\\\", \\\"url\\\": \\\"https://www.forbes.com/profile/mark-zuckerberg/\\\", \\\"content\\\": \\\"Meta has donated $1 million to President-elect Donald Trump's inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark\\\", \\\"score\\\": 0.6701125, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meta - Leadership & Governance\\\", \\\"url\\\": \\\"https://investor.atmeta.com/leadership-and-governance/\\\", \\\"content\\\": \\\"Mr. Andreessen was a co-founder of Netscape Communications Corporation, a software company, serving in various positions, including Chief Technology Officer and Executive Vice President of Products. Ms. Killefer also served as Assistant Secretary for Management, Chief Financial Officer, and Chief Operating Officer of the U.S. Department of the Treasury from 1997 to 2000 and as a member of the IRS Oversight Board from 2000 to 2005, including as Chair of the IRS Oversight Board from 2002 to 2004. Ms. Travis has served as Executive Vice President and Chief Financial Officer of The Estee Lauder Companies Inc., a global manufacturer and marketer of skin care, makeup, fragrance and hair care products, since August 2012.\\\", \\\"score\\\": 0.6175132, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"META | Meta Platforms Inc. Company Profile & Executives - WSJ\\\", \\\"url\\\": \\\"https://www.wsj.com/market-data/quotes/META/company-people\\\", \\\"content\\\": \\\"Company profile for Meta Platforms Inc. including key executives, insider trading, ownership, revenue and average growth rates. View detailed META description & address.\\\", \\\"score\\\": 0.23361932, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Mark_Zuckerberg\\\", \\\"content\\\": \\\"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\\\u9648\\\\u660e\\\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\\\", \\\"score\\\": 0.05564338, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " current CEO of Meta is not explicitly stated in",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the search results. However, Mark Zuckerberg is mentioned as the CEO",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of Meta in some of the search results, but it is not clear",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " if he is still the current CEO.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"current CEO of Meta\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"{\\\"query\\\": \\\"current CEO of Meta\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Meta - Leadership & Governance\\\", \\\"url\\\": \\\"https://investor.atmeta.com/leadership-and-governance/\\\", \\\"content\\\": \\\"Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. Mark is responsible for setting the overall direction and product strategy for the company. He leads the design of Meta's services and development of its core technology and infrastructure. Mark studied computer science at Harvard\\\", \\\"score\\\": 0.8342047, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.79099923, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"The 11 People Running Meta's $1 Trillion Social Media and ... - Observer\\\", \\\"url\\\": \\\"https://observer.com/2024/01/meta-facebook-top-executives/\\\", \\\"content\\\": \\\"Meta has one of the most stable leadership team in the tech industry. Almost all of Meta's top executives have been with the company for well over a decade. ... 39, cofounder, chairman and CEO\\\", \\\"score\\\": 0.45536873, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Meta leadership: images of senior executives for download to use in articles about the company.\\\", \\\"score\\\": 0.21026355, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Mark_Zuckerberg\\\", \\\"content\\\": \\\"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\\\u9648\\\\u660e\\\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\\\", \\\"score\\\": 0.05564338, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " current CEO of Meta is Mark Zuckerberg.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "brave_search.call(query=\"current CEO of Meta\")",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "current CEO of Meta"
-                },
-                "call_id": "cc85a2df-6b2d-41c0-97dd-1509ca8061c4",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "brave_search"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the founder of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Meta founder\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"{\\\"query\\\": \\\"Meta founder\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.81595254, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\\\", \\\"score\\\": 0.70726365, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meta - Leadership & Governance\\\", \\\"url\\\": \\\"https://investor.atmeta.com/leadership-and-governance/\\\", \\\"content\\\": \\\"Mr. Andreessen was a co-founder of Netscape Communications Corporation, a software company, serving in various positions, including Chief Technology Officer and Executive Vice President of Products. Ms. Killefer also served as Assistant Secretary for Management, Chief Financial Officer, and Chief Operating Officer of the U.S. Department of the Treasury from 1997 to 2000 and as a member of the IRS Oversight Board from 2000 to 2005, including as Chair of the IRS Oversight Board from 2002 to 2004. Ms. Travis has served as Executive Vice President and Chief Financial Officer of The Estee Lauder Companies Inc., a global manufacturer and marketer of skin care, makeup, fragrance and hair care products, since August 2012.\\\", \\\"score\\\": 0.467308, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meta Platforms - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Meta_Platforms\\\", \\\"content\\\": \\\"Following a period of intense scrutiny and damaging whistleblower leaks, news started to emerge on October 21, 2021, about Facebook's plan to rebrand the company and change its name.[15][54] In the Q3 2021 Earnings Call on October 25, Mark Zuckerberg discussed the ongoing criticism of the company's social services and the way it operates, and pointed to the pivoting efforts to building the metaverse \\\\u2013 without mentioning the rebranding and the name change.[55] The metaverse vision and the name change from Facebook, Inc. to Meta Platforms was introduced at Facebook Connect on October 28, 2021.[16] Based on Facebook's PR campaign, the name change reflects the company's shifting long term focus of building the metaverse, a digital extension of the physical world by social media, virtual reality and augmented reality features.[16][56]\\\", \\\"score\\\": 0.14999175, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Mark_Zuckerberg\\\", \\\"content\\\": \\\"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\\\u9648\\\\u660e\\\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\\\", \\\"score\\\": 0.03678684, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " founder of Meta is Mark Zuckerberg.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 1220
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 18
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 1238
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the founder of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Meta founder\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"{\\\"query\\\": \\\"Meta founder\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.81595254, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\\\", \\\"score\\\": 0.70726365, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meta - Leadership & Governance\\\", \\\"url\\\": \\\"https://investor.atmeta.com/leadership-and-governance/\\\", \\\"content\\\": \\\"Mr. Andreessen was a co-founder of Netscape Communications Corporation, a software company, serving in various positions, including Chief Technology Officer and Executive Vice President of Products. Ms. Killefer also served as Assistant Secretary for Management, Chief Financial Officer, and Chief Operating Officer of the U.S. Department of the Treasury from 1997 to 2000 and as a member of the IRS Oversight Board from 2000 to 2005, including as Chair of the IRS Oversight Board from 2002 to 2004. Ms. Travis has served as Executive Vice President and Chief Financial Officer of The Estee Lauder Companies Inc., a global manufacturer and marketer of skin care, makeup, fragrance and hair care products, since August 2012.\\\", \\\"score\\\": 0.467308, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meta Platforms - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Meta_Platforms\\\", \\\"content\\\": \\\"Following a period of intense scrutiny and damaging whistleblower leaks, news started to emerge on October 21, 2021, about Facebook's plan to rebrand the company and change its name.[15][54] In the Q3 2021 Earnings Call on October 25, Mark Zuckerberg discussed the ongoing criticism of the company's social services and the way it operates, and pointed to the pivoting efforts to building the metaverse \\\\u2013 without mentioning the rebranding and the name change.[55] The metaverse vision and the name change from Facebook, Inc. to Meta Platforms was introduced at Facebook Connect on October 28, 2021.[16] Based on Facebook's PR campaign, the name change reflects the company's shifting long term focus of building the metaverse, a digital extension of the physical world by social media, virtual reality and augmented reality features.[16][56]\\\", \\\"score\\\": 0.14999175, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Mark_Zuckerberg\\\", \\\"content\\\": \\\"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\\\u9648\\\\u660e\\\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\\\", \\\"score\\\": 0.03678684, \\\"raw_content\\\": null}]}\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " founder of Meta is Mark Zuckerberg.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 1220
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 18
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 1238
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the founder of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Meta founder\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"{\\\"query\\\": \\\"Meta founder\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.81595254, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meta - Leadership & Governance\\\", \\\"url\\\": \\\"https://investor.atmeta.com/leadership-and-governance/\\\", \\\"content\\\": \\\"Mr. Andreessen was a co-founder of Netscape Communications Corporation, a software company, serving in various positions, including Chief Technology Officer and Executive Vice President of Products. Ms. Killefer also served as Assistant Secretary for Management, Chief Financial Officer, and Chief Operating Officer of the U.S. Department of the Treasury from 1997 to 2000 and as a member of the IRS Oversight Board from 2000 to 2005, including as Chair of the IRS Oversight Board from 2002 to 2004. Ms. Travis has served as Executive Vice President and Chief Financial Officer of The Estee Lauder Companies Inc., a global manufacturer and marketer of skin care, makeup, fragrance and hair care products, since August 2012.\\\", \\\"score\\\": 0.46759978, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Meta leadership: images of senior executives for download to use in articles about the company. ... Mark Zuckerberg, Founder, Chairman and Chief Executive Officer. Nick Clegg, President, Global Affairs. Joel Kaplan, Chief Global Affairs Officer. Susan Li, Chief Financial Officer.\\\", \\\"score\\\": 0.46482924, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meta Platforms - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Meta_Platforms\\\", \\\"content\\\": \\\"Following a period of intense scrutiny and damaging whistleblower leaks, news started to emerge on October 21, 2021, about Facebook's plan to rebrand the company and change its name.[15][54] In the Q3 2021 Earnings Call on October 25, Mark Zuckerberg discussed the ongoing criticism of the company's social services and the way it operates, and pointed to the pivoting efforts to building the metaverse \\\\u2013 without mentioning the rebranding and the name change.[55] The metaverse vision and the name change from Facebook, Inc. to Meta Platforms was introduced at Facebook Connect on October 28, 2021.[16] Based on Facebook's PR campaign, the name change reflects the company's shifting long term focus of building the metaverse, a digital extension of the physical world by social media, virtual reality and augmented reality features.[16][56]\\\", \\\"score\\\": 0.14999175, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Mark_Zuckerberg\\\", \\\"content\\\": \\\"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\\\u9648\\\\u660e\\\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\\\", \\\"score\\\": 0.036911618, \\\"raw_content\\\": null}]}\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " founder of Meta is Mark Zuckerberg.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 1101
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 18
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 1119
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the founder of Meta is.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "brave_search.call(query=\"Meta founder\")",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Meta founder"
-                },
-                "call_id": "a9a452ac-a1a1-4414-a107-4cdc283f4129",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "brave_search"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 33
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 43
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not able to find the boiling point of poly",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "juice as it is a fictional liquid from the Harry Potter series",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". The function is only able to find the boiling point of real",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " liquids.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 70
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 56
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 126
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "` is not able to find the boiling point of polyjuice",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " as it is not a real liquid.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 70
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 38
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is not able to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " find the boiling point of polyju",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ice as it is not a real liquid.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 70
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 38
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point`",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " is not able to find the boiling point",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of polyjuice as it is a",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " fictional liquid from the Harry Potter series. The function is only able",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to find the boiling point of real liquids.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 70
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 56
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 126
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is not able to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " find the boiling point of polyjuice as",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " it is a fictional liquid from the Harry Potter series. The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function is only able to find the boiling point of real liquids",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 70
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 56
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 126
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is not able to find the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice as it is",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not a real liquid.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 70
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 38
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is not able to find the boiling point",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of polyjuice as it is not a real liquid.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 70
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 38
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is not able to find",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the boiling point of polyjuice as it is",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not a real liquid.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 70
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 38
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point` is not able to find the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice as",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " it is not a real liquid.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 70
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 38
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_point\", \"parameters\": {\"liquid_name\": \"polyjuice",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "918c5630-abc9-4500-ac0b-b630e0743561",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 30
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 40
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point\",",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " \"parameters\": {\"liquid_name\": \"polyjuice\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "b63f9b8c-c514-48bb-8e0f-788b29c1c106",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 30
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 40
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_bo",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "iling_point\", \"parameters\": {\"liquid_name\": \"polyjuice",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "364ad4a8-2e6e-4afb-8c81-1cf98774758a",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 30
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 40
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_point\", \"parameters\": {\"liquid_name\": \"polyjuice",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "ec121f44-66e0-47e8-971a-211142998c65",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 30
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 40
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"none\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " couldn't find any information on",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the boiling point of Polyjuice.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Polyjuice is a magical potion in the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Harry Potter series that allows the drinker to transform into",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " someone else. It's not a physical substance",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with a boiling point. If you have any other questions, I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'d be happy to help.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 30
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 73
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 103
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"none\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " couldn't find any information on the boiling point of Polyjuice",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". Polyjuice is a magical potion in the Harry Potter series",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " that allows the drinker to transform into someone else.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " It's not a physical substance with a boiling point. If",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " you have any other questions, I'd",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " be happy to help.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 30
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 73
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 103
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"str\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\": \"get_boiling_point\", \"parameters\": {\"liquid",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_name\": \"polyjuice\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "b41fafca-4559-4a0a-b49b-f4edf893d08a",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 30
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 40
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_point\", \"parameters\": {\"liquid_name\": \"polyjuice",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "1ca40c99-853b-44e3-ab2c-f194e3ed1b45",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 30
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 40
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef get_nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(get_nth_prime(100))\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 100th prime number is 541.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef get_nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(get_nth_prime(100))\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\n541\\n[/stdout]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 100th prime number is 541.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 217
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 20
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 237
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef get_nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(get_nth_prime(100))\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"completed\\n[stdout]\\n541\\n[/stdout]\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 100th prime number is 541.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 217
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 20
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 237
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "def is_prime(n):\n    if n <= 1:\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "        return False\n    if n <= 3:\n        return",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " True\n    if n % 2 == 0 or",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " n % 3 == 0",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ":\n        return False\n    i = 5\n    while",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " i * i <= n:\n        if n % i == ",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "0 or n % (i + 2) == ",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "0:\n            return False\n        i",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " += 6\n    return True\n\ndef get_nth_prime(n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "):\n    count = 0\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "    num = 2\n    while",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " True:\n        if is_prime(num):\n            count += 1",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\n            if count == n:\n                return num\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "        num += 1\n\nprint(get_nth_prime",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "(100))",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "def is_prime(n):\n    if n <= 1:\n        return False\n    if n <= 3:\n        return True\n    if n % 2 == 0 or n % 3 == 0:\n        return False\n    i = 5\n    while i * i <= n:\n        if n % i == 0 or n % (i + 2) == 0:\n            return False\n        i += 6\n    return True\n\ndef get_nth_prime(n):\n    count = 0\n    num = 2\n    while True:\n        if is_prime(num):\n            count += 1\n            if count == n:\n                return num\n        num += 1\n\nprint(get_nth_prime(100))"
-                },
-                "call_id": "a1296d7e-6ca3-4056-b43f-19a9663e8bcb",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 40
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 50
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "Per",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "plexity the company was founded in 202",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "2.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 105
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 22
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 127
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search\", \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "parameters\": {\"query\": \"Perplexity company founding date\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Perplexity company founding date"
-                },
-                "call_id": "5ea88dde-f090-4157-9219-45a16100ef21",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 67
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 37
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 104
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "Per",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "plexity the company was founded in 2022.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 105
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 22
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 127
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "{\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "type\": \"function\", \"name\": \"knowledge_search\", \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "parameters\": {\"query\": \"Perplexity",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " company founding date\"}}",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Perplexity company founding date"
-                },
-                "call_id": "75b712aa-fdeb-48bb-be40-c7fcd06242b6",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 67
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 37
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 104
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\",",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " \"parameters\": {\"query\": \"Perplexity company founding date\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Perplexity company founding date"
-                },
-                "call_id": "3d505e8e-fe35-486e-9661-27f67702621d",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 29
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 39
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " NBA was created on August 3, 1949, with the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " merger of the Basketball Association of America (BAA) and the National",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Basketball League (NBL).",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " NBA was created on August 3, 1949, with",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the merger of the Basketball Association of America (BAA) and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the National Basketball League (NBL).",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 65
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 45
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " NBA was created on August 3, 1949, with",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the merger of the Basketball Association of America (",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "BAA) and the National Basketball League (NBL",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ").",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 65
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 45
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 110
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\", \"parameters\": {\"query\": \"when was the n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "ba created\"}}",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "when was the nba created"
-                },
-                "call_id": "03ce919a-d1b5-4120-896e-433e79910757",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "metric": "prompt_tokens",
-              "unit": null,
-              "value": 27
-            },
-            {
-              "metric": "completion_tokens",
-              "unit": null,
-              "value": 10
-            },
-            {
-              "metric": "total_tokens",
-              "unit": null,
-              "value": 37
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " provided function definitions",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " are not suitable",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " for this task. Please re",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "work them to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " align with the task requirements.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "D2n_IS_8",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:03:32.021393+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "amAiZv5PQKSsA74j",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 90
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "D2n_IS_8",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:03:32.021420+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "amAiZv5PQKSsA74j",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 32
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "D2n_IS_8",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:03:32.021427+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "amAiZv5PQKSsA74j",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 122
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point(liquid_name=\"polyjuice\", celcius",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "fc83cd58-3cfb-431d-a1e2-a8572d682e2f",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "YhFB39Ik",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:31.335148+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "3n2xEtjLQt6ZGVR_",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 267
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "YhFB39Ik",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:31.335179+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "3n2xEtjLQt6ZGVR_",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "YhFB39Ik",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:31.335185+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "3n2xEtjLQt6ZGVR_",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 295
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point(liquid_name=\"polyjuice\", celcius",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "7d41a671-f3ce-46dd-b001-443aaa65ccb7",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "lnqeV_cZ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:29.708270+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "me4qbUSCQ5yKvrAG",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 211
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "lnqeV_cZ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:29.708281+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "me4qbUSCQ5yKvrAG",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "lnqeV_cZ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:29.708284+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "me4qbUSCQ5yKvrAG",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 239
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point(liquid_name=\"polyjuice\", celcius",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "21c8e60f-d205-4b3d-b065-47fa56dcd273",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "TDJHPVDZ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:28.195776+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "r2GKj8iqTYaNxTeq",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 155
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "TDJHPVDZ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:28.195808+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "r2GKj8iqTYaNxTeq",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "TDJHPVDZ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:28.195814+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "r2GKj8iqTYaNxTeq",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 183
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point(liquid_name=\"polyjuice\", celcius",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "135d468e-6391-401d-a3c0-3b08c3a6eb8c",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "8pZtsyNW",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:47:51.321089+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1Ly70plQQGel5jgc",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 99
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "8pZtsyNW",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:47:51.321130+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1Ly70plQQGel5jgc",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "8pZtsyNW",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:47:51.321140+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "1Ly70plQQGel5jgc",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 127
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point(liquid_name='polyjuice",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "', celcius=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "3955f756-9aa0-433f-be8f-af8941c220de",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "QZ6PSGpT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:03:29.629456+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "M72bosg8TBe3uhx3",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 43
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "QZ6PSGpT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:03:29.629488+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "M72bosg8TBe3uhx3",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "QZ6PSGpT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:03:29.629494+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "M72bosg8TBe3uhx3",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 71
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function call returned an",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error since",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " \"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "polyjuice\" is",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not a real liquid. Polyju",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ice is a fictional substance from the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Harry Potter series. The boiling point",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of a substance is a physical",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " property that can be measured and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " quantified",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", but it only applies",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to real substances that exist in the physical world.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "y9SHtJTQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:05:01.411612+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_I2Cu85IRtOSBSX9",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 84
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "y9SHtJTQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:05:01.411644+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_I2Cu85IRtOSBSX9",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 73
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "y9SHtJTQ",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:05:01.411650+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "_I2Cu85IRtOSBSX9",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 157
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function get_boiling_point is not",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " recognized.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "Z7jBGJ-8",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:55.401637+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "WxMAq579Q-ixJ3wJ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 93
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "Z7jBGJ-8",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:55.401666+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "WxMAq579Q-ixJ3wJ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 20
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "Z7jBGJ-8",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:55.401670+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "WxMAq579Q-ixJ3wJ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 113
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point_with_metadata\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function get_bo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "iling_point_with_metadata does not exist,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " I will",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " assume you",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " meant get_bo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "iling_point_with_metadata",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". The boiling point of polyjuice",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " is -100.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "8dM6i5mO",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:05:03.329281+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "zMJDP5dXRrChi7uE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 86
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "8dM6i5mO",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:05:03.329312+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "zMJDP5dXRrChi7uE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 45
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "8dM6i5mO",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:05:03.329318+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "zMJDP5dXRrChi7uE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 131
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point_with_metadata` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point_with_metadata\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function get_boiling_point_with_metadata(",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "liquid_name=\"polyjuice\", celcius=True) should be",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " used to get the answer.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "pzQMKAJc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:56.809816+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "018KkGcOThSSiZfE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 97
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "pzQMKAJc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:56.809911+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "018KkGcOThSSiZfE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 39
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "pzQMKAJc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:56.809922+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "018KkGcOThSSiZfE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 136
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point(liquid_name='polyjuice",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "', celcius=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "328cb19d-47bb-47cc-8258-a5ca2e26803e",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "dS0bhfN_",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:53.324788+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "UJz5Cas1SDyQYeBk",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 37
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "dS0bhfN_",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:53.324835+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "UJz5Cas1SDyQYeBk",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "dS0bhfN_",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:53.324844+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "UJz5Cas1SDyQYeBk",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 65
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point_with_metadata",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "(liquid_name='polyjuice', cel",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "cius=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "5bb48d00-7d5c-49e2-bddf-e5fdc5f35485",
-                "tool_name": "get_boiling_point_with_metadata"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "mfrFN7m2",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:05:02.136501+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "T4eddr4-SMWPQwKA",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 37
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "mfrFN7m2",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:05:02.136529+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "T4eddr4-SMWPQwKA",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "mfrFN7m2",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:05:02.136535+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "T4eddr4-SMWPQwKA",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 67
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Give me a sentence that contains the word: hello\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": []}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "When",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " I answered the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " phone, the friendly",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " voice on the other end said \"hello\"",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and asked how I was doing.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "tJEuRhla",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:01.044284+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "bnDS7Z41TRO0UyfH",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "tJEuRhla",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:01.044312+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "bnDS7Z41TRO0UyfH",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 34
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "tJEuRhla",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:01.044318+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "bnDS7Z41TRO0UyfH",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 64
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\\n# Sample of data\\nprint(\\\"Data sample from file:\\\")\\nprint(df.head())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\\n# Sample of data\\nprint(\\\"Data sample from file:\\\")\\nprint(df.head())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " am not able",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to execute this task as",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " it exceeds the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " limitations of the functions I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " have been given.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "5If5go-q",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:48.070675+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "StUjhrTMQKKQSRvS",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 433
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "5If5go-q",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:48.070742+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "StUjhrTMQKKQSRvS",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 31
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "5If5go-q",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:48.070750+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "StUjhrTMQKKQSRvS",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 464
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\\n# Sample of data\\nprint(\\\"Data sample from file:\\\")\\nprint(df.head())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\n# Load data\ndf =",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " pd.read_csv(\"/var/folders/rb/qv8vwgyj",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "6yjd3t4pwsy9t0rm0000",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "gn/T/tmp2x_sml66/ZEjbinQHin",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "flation.csv\")\n# Rows\nprint(\"Number of rows and columns in the",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\",",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " len(df.columns))\n# Column names\nprint(\"Columns of the data",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".dtypes)\n# Sample of data\nprint(\"Data sample from file:\")\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "print(df.head())",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp2x_sml66/ZEjbinQHinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)\n# Sample of data\nprint(\"Data sample from file:\")\nprint(df.head())"
-                },
-                "call_id": "1df8b196-9eff-4b06-97e7-ab175c741e8f",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "fLqIbpek",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:40.262304+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "StUjhrTMQKKQSRvS",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 235
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "fLqIbpek",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:40.262340+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "StUjhrTMQKKQSRvS",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "fLqIbpek",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:40.262347+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "StUjhrTMQKKQSRvS",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 245
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\n# Load data\ndf = pd",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "pwsy9t0rm0000gn/T/tmp2x_sml66/ZEj",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "binQHinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ":\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n#",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "(\"Datatype of the columns are:\", df.dtypes)\n# Sample",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " of data\nprint(\"Data sample from file:\")\nprint(df.head())",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp2x_sml66/ZEjbinQHinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)\n# Sample of data\nprint(\"Data sample from file:\")\nprint(df.head())"
-                },
-                "call_id": "c1708ded-f272-4008-b91f-19d61780c394",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "KTMayjIE",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:37.305765+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "StUjhrTMQKKQSRvS",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 37
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "KTMayjIE",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:37.305820+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "StUjhrTMQKKQSRvS",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "KTMayjIE",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:37.305832+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "StUjhrTMQKKQSRvS",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics about the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"The error message indicates that the file \\\"<TEMP_FILE>\" does not exist. This could be due to a variety of reasons such as the file being deleted, the path being incorrect, or the file not being accessible.\\n\\nTo resolve this issue, you can try the following:\\n\\n1. Check the file path: Ensure that the file path is correct and the file exists at that location.\\n2. Check file permissions: Ensure that the file is accessible and you have the necessary permissions to read it.\\n3. Try a different file: If the file is not accessible, try loading a different file to see if the issue is specific to this file or a general issue with your code.\\n4. Check for typos: Ensure that there are no typos in the file path or the code.\\n\\nIf you are still having issues, please provide more details about the file and the code you are using, and I'll be happy to help further.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Convert the 'Year' column to datetime\\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\\n\\n# Group by 'Year' and calculate the average inflation\\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\\n\\n# Plot the average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error message indicates that the file \"/var/folders/rb/qv8",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "vwgyj6yjd3t4pwsy9t0",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "rm0000gn/T/tmp2x_sml66/9vY",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "vmVRoinflation.csv\" does not exist. This could be due to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " a variety of reasons such as the file being deleted, the path being incorrect",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", or the file not being accessible.\n\nTo resolve this issue, you can",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " try the following:\n\n1. Check the file path: Ensure that the file",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " path is correct and the file exists at that location.\n2. Check file permissions:",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Ensure that the file is accessible and you have the necessary permissions to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " read it.\n3. Try a different file: If the file is not",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " accessible, try loading a different file to see if the issue is specific to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " this file or a general issue with your code.\n4. Check for ty",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "pos: Ensure that there are no typos in the file path or the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " code.\n\nIf you are still having issues, please provide more details about the file and the code",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " you are using, and I'll be happy to help further.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "f28sT2i7",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:23.262530+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8YKzpfybSiGgrHOF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 680
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "f28sT2i7",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:23.262555+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8YKzpfybSiGgrHOF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 238
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "f28sT2i7",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:23.262558+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8YKzpfybSiGgrHOF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 918
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics about the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"The error message indicates that the file \\\"<TEMP_FILE>\" does not exist. This could be due to a variety of reasons such as the file being deleted, the path being incorrect, or the file not being accessible.\\n\\nTo resolve this issue, you can try the following:\\n\\n1. Check the file path: Ensure that the file path is correct and the file exists at that location.\\n2. Check file permissions: Ensure that the file is accessible and you have the necessary permissions to read it.\\n3. Try a different file: If the file is not accessible, try loading a different file to see if the issue is specific to this file or a general issue with your code.\\n4. Check for typos: Ensure that there are no typos in the file path or the code.\\n\\nIf you are still having issues, please provide more details about the file and the code you are using, and I'll be happy to help further.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " file\ndf = pd.read_csv(\"/var/folders/rb/qv",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "8vwgyj6yjd3t4pwsy9t",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "0rm0000gn/T/tmp2x_sml66/9v",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "YvmVRoinflation.csv\")\n\n# Convert the 'Year'",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " column to datetime\ndf['Year'] = pd.to_datetime(df['Year",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "'], format='%Y')\n\n# Group by",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " 'Year' and calculate the average inflation\ndf_avg_in",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "flation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "# Plot the average yearly inflation as a time series\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "plt.figure(figsize=(10,6))\nplt.plot(df_avg_inflation['",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "Year'], df_avg_in",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "flation['Inflation'], marker='o')\nplt",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "('Inflation')\nplt.grid(True)\nplt.show()",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp2x_sml66/9vYvmVRoinflation.csv\")\n\n# Convert the 'Year' column to datetime\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\n\n# Group by 'Year' and calculate the average inflation\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n# Plot the average yearly inflation as a time series\nplt.figure(figsize=(10,6))\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Inflation')\nplt.grid(True)\nplt.show()"
-                },
-                "call_id": "f4efa2d4-e4e7-4ea1-8c5e-6a78bec5816f",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "qQY5sAli",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:21.953806+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8YKzpfybSiGgrHOF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 432
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "qQY5sAli",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:21.953843+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8YKzpfybSiGgrHOF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "qQY5sAli",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:21.953847+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8YKzpfybSiGgrHOF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 442
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics about the dataframe\\nprint(df.describe())\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " error message indicates that the file \"/var/folders/rb/qv8vwgyj6y",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "jd3t4pwsy9t0rm0000gn/T/tmp2x_sml",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "66/9vYvmVRoinflation.csv\" does not exist. This could be",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " due to a variety of",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " reasons such as the file",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " being deleted, the path being incorrect, or the file",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " not being accessible.\n\nTo resolve this issue, you can try",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the following:\n\n1. Check the file path: Ensure that",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the file path is correct and the file exists at that",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " location.\n2. Check file permissions: Ensure that",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the file is accessible and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " you have the necessary permissions to read",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " it.\n3. Try a different file: If",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the file is not accessible, try loading a different file to see",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " if the issue is specific to this file or a general",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " issue with your code.\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "4. Check for typos: Ensure that",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " there are no typos in the file path or the code.\n\n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "If you are",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " still having issues, please provide more details about",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the file and the code you are using",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", and I'll be happy to help further.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "KwfNrQLy",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:19.630894+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "kNsljyzfQV2Cn4aZ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 192
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "KwfNrQLy",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:19.630987+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "kNsljyzfQV2Cn4aZ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 238
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "KwfNrQLy",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:19.630996+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "kNsljyzfQV2Cn4aZ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 430
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "import pandas as pd\n\n# Load the CSV file\ndf = pd.read",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "_csv(\"/var/folders/rb/qv8vwgyj6y",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "jd3t4pwsy9t0rm0000gn/T",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "/tmp2x_sml66/9vYvmVRoinflation.csv",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "# Print information about the dataframe\nprint(df",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ".info())\n\n# Print summary statistics about the dataframe\nprint(df.describe",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "())",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "import pandas as pd\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp2x_sml66/9vYvmVRoinflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print information about the dataframe\nprint(df.info())\n\n# Print summary statistics about the dataframe\nprint(df.describe())"
-                },
-                "call_id": "5bbfebeb-4360-4ef9-a9e2-4227a8e8c699",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "AyEX3So6",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:17.873486+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "kNsljyzfQV2Cn4aZ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 36
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "AyEX3So6",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:17.873500+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "kNsljyzfQV2Cn4aZ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "AyEX3So6",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:17.873503+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "kNsljyzfQV2Cn4aZ",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 46
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:42933\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:20e5d\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:0cd43\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:20e5d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:0cd43\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help. What's your question about Torchtune?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"using LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:20e5d\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:20e5d\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:20e5d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:20e5d\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:20e5d\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " these steps:\n\n1. Import the necessary modules: `",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "from torchtune.models.llama2 import llama2_7b",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", lora_llama2_7b`\n2. Create a",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Llama2 model with LoRA: `lora",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_model = lora_llama2_7b(lora_attn_modules",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "=[\"q_proj\", \"v_proj\"])`\n3. Load the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " pre-trained Llama2 weights into",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the LoRA model: `",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "lora_model.load_state_dict(base_model.state_dict(), strict=False)`\n4",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". Set only LoRA parameters to trainable: `from torch",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "tune.modules.peft.peft_utils import get_adapter_params, set_train",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "able_params`\n5. Run the LoRA finetune using torch",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "tune's LoRA recipe: `tune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " run --nnodes 1 --nproc_per_node ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "2 lora_finetune_distributed --config llama2/7B",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_lora`\n\nYou can also experiment with different LoRA configurations, such as",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " applying LoRA to all linear layers in the self-attention",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", increasing the rank, and scaling alpha",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and rank together.\n\nNote: You need to have the pre-trained Llama2 weights and tokenizer downloaded",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " and installed before running the LoRA finetune. Additionally,",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " you may need to modify the config file to point to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the location of your Llama2 weights and tokenizer.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "4uwx07lA",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:34.698983+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8C2YTmRESTKZ0i1l",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 146
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "4uwx07lA",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:34.699031+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8C2YTmRESTKZ0i1l",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 296
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "4uwx07lA",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:34.699038+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8C2YTmRESTKZ0i1l",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 442
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:42933\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:20e5d\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:0cd43\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:20e5d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:0cd43\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help. What's your question about Torchtune?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[k",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "nowledge_search(query=\"using LoRA in Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\")]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "using LoRA in Torchtune"
-                },
-                "call_id": "ce4b06be-6e7f-45cf-9555-25398caaf4f1",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "vGtNmXNY",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:32.673350+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8C2YTmRESTKZ0i1l",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 107
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "vGtNmXNY",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:32.673375+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8C2YTmRESTKZ0i1l",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 23
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "vGtNmXNY",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:32.673381+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "8C2YTmRESTKZ0i1l",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 130
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:42933\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:20e5d\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:0cd43\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:20e5d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:0cd43\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help. What's",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " your question about Torchtune?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "7n3WMt3R",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:31.179269+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "BLgI_VzNTCCRs_2T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 75
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "7n3WMt3R",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:31.179301+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "BLgI_VzNTCCRs_2T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 25
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "7n3WMt3R",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:31.179308+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "BLgI_VzNTCCRs_2T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 100
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8106c\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:a03f3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:0719d\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:a03f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:0719d\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help. What's your first question about Torchtune?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"using LoRA in Torchtune\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:a03f3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:a03f3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer's self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:a03f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:a03f3\\nContent:  from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n  # Assuming that base_model already has the pretrained Llama2 weights,\\n  # this will directly load them into your LoRA model without any conversion necessary.\\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n  # Fetch all params from the model that are associated with LoRA.\\n  lora_params = get_adapter_params(lora_model)\\n\\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n  set_trainable_params(lora_model, lora_params)\\n\\n  # Print the total number of parameters\\n  total_params = sum([p.numel() for p in lora_model.parameters()])\\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n  print(\\n    f\\\"\\\"\\\"\\n    {total_params} total params,\\n    {trainable_params}\\\" trainable params,\\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n    \\\"\\\"\\\"\\n  )\\n\\n  6742609920 total params,\\n  4194304 trainable params,\\n  0.06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:a03f3\\nContent: ,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\\n\\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\\\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\\\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\\n\\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\\n\\n.. image:: /_static/img/lora_experiment_loss_curves.png\\n\\n.. note::\\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\\n    using W&B in torchtune, see our \\\":ref:`wandb_logging`\\\" recipe.\\n\\n.. _lora_tutorial_memory_tradeoff_label:\\n\\nTrading off memory and model performance with LoRA\\n--------------------------------------------------\\n\\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\\non a single device using most commodity GPUs which support `bfloat16 <https://\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "To",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " use LoRA in Torchtune, you can follow",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " these steps:\n\n1. Import the necessary modules: `",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "from torchtune.models.llama2 import llama2_7b",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", lora_llama2_7b`\n2. Create a",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Llama2 model with LoRA: `lora",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_model = lora_llama2_7b(lora_attn_modules",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "=[\"q_proj\", \"v_proj\"])`\n3. Load",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the pre-trained Llama2 weights into",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the LoRA model: `lora_model.load_state",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_dict(base_model.state_dict(), strict=False)`\n4. Set only Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA parameters to trainable: `set_trainable_params(lora_model, get",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_adapter_params(lora_model))`\n5. Run the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " LoRA finetune using torch",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "tune's LoRA",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " recipe: `tune run --nnodes 1 --",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "nproc_per_node 2 lora_finetune_distributed --config",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " llama2/7B_lora`\n\nYou can also experiment with different Lo",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "RA configurations, such as applying LoRA to all linear layers",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " in the self-attention, increasing the rank, and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " scaling alpha and rank together.\n\nNote: You need to",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " have the Llama2 weights and tokenizer downloaded and installed before running the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " LoRA finetune. Additionally, you can use",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " torchtune's `Wand",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "BLogger` to generate loss curves and track your experiments",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "IZ8Q_jX_",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:28.484818+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7GQeegpgTI-gqjHp",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 147
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "IZ8Q_jX_",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:28.484914+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7GQeegpgTI-gqjHp",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 290
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "IZ8Q_jX_",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:28.484922+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7GQeegpgTI-gqjHp",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 437
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8106c\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:a03f3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:0719d\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:a03f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:0719d\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help. What's your first question about Torchtune?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[k",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "nowledge_search(query=\"using LoRA in Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\")]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "using LoRA in Torchtune"
-                },
-                "call_id": "d45a488f-368a-4a3b-a2d9-8fde584fc8f8",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "qLPBZlok",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:26.209198+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7GQeegpgTI-gqjHp",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 108
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "qLPBZlok",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:26.209239+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7GQeegpgTI-gqjHp",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 23
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "qLPBZlok",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:26.209247+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7GQeegpgTI-gqjHp",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 131
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8106c\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\\\"json\\\",\\n        data_files=\\\"data/my_data.json\\\",\\n        split=\\\"train\\\",\\n        conversation_column=\\\"dialogue\\\",\\n        conversation_style=\\\"sharegpt\\\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:a03f3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:0719d\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:a03f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: ['q_proj', 'v_proj']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:0719d\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "I",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'m ready to help. What's",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " your first question about Torchtune",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "?",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "mYTkxvK_",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:23.525734+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "kpcdkZQ2SsSOh9Lw",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 75
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "mYTkxvK_",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:23.525763+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "kpcdkZQ2SsSOh9Lw",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 26
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "mYTkxvK_",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:23.525770+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "kpcdkZQ2SsSOh9Lw",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 101
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[k",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "nowledge_search(query=\"Tor",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "chtune documentation\")]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Torchtune documentation"
-                },
-                "call_id": "385cbde8-19e8-4c8b-84ca-b75050b3666b",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "-7YS2sLl",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:30.668846+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "BLgI_VzNTCCRs_2T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 39
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "-7YS2sLl",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:30.668859+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "BLgI_VzNTCCRs_2T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 20
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "-7YS2sLl",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:30.668861+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "BLgI_VzNTCCRs_2T",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 59
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \\\"role\\\": \\\"system\\\",\\n            \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "L",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "lama3-8B uses grouped-query",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " attention instead of",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the standard multi-head attention.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "1eIEdjPP",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:18.982970+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rNeuYcnxTSqrP6Dg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 80
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "1eIEdjPP",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:18.983000+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rNeuYcnxTSqrP6Dg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "1eIEdjPP",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:18.983005+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rNeuYcnxTSqrP6Dg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \\\"role\\\": \\\"system\\\",\\n            \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "L",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "lama3-8B uses grouped-query attention instead of",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the standard",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " multi-head attention.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "SlTnlfYc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:12.884663+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "liTx9auyTkyfvrBr",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 80
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "SlTnlfYc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:12.884753+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "liTx9auyTkyfvrBr",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "SlTnlfYc",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:12.884760+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "liTx9auyTkyfvrBr",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[k",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "nowledge_search(query=\"Llama3-8",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "B attention type\")]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Llama3-8B attention type"
-                },
-                "call_id": "4901bbdf-8faf-4a57-b6f6-01688c6290e6",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "DBPomV08",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:15.412559+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rNeuYcnxTSqrP6Dg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "DBPomV08",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:15.412607+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rNeuYcnxTSqrP6Dg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 24
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "DBPomV08",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:15.412615+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "rNeuYcnxTSqrP6Dg",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 64
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[k",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "nowledge_search(query=\"Llama3-8B attention",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " type\")]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Llama3-8B attention type"
-                },
-                "call_id": "dd056386-b105-47e5-bd85-07e5ae096de1",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "yjKrmpeo",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:12.041566+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "liTx9auyTkyfvrBr",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "yjKrmpeo",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:12.041591+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "liTx9auyTkyfvrBr",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 24
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "yjKrmpeo",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:12.041597+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "liTx9auyTkyfvrBr",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 64
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"current CEO of Meta\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"{\\\"query\\\": \\\"current CEO of Meta\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Meta - Leadership & Governance\\\", \\\"url\\\": \\\"https://investor.atmeta.com/leadership-and-governance/\\\", \\\"content\\\": \\\"Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. Mark is responsible for setting the overall direction and product strategy for the company. He leads the design of Meta's services and development of its core technology and infrastructure. Mark studied computer science at Harvard\\\", \\\"score\\\": 0.8342047, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\\\", \\\"score\\\": 0.8190992, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.79099923, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meet the Executive CSuite Team of Meta (Facebook) [2025]\\\", \\\"url\\\": \\\"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\\\", \\\"content\\\": \\\"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\\\", \\\"score\\\": 0.7602419, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Mark_Zuckerberg\\\", \\\"content\\\": \\\"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\\\u9648\\\\u660e\\\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\\\", \\\"score\\\": 0.05564338, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " current CEO of Meta is Mark Zuckerberg.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "oB7hDf6E",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:07.084924+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "hwA8OLUhQ1qa3ecF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 1145
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "oB7hDf6E",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:07.084934+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "hwA8OLUhQ1qa3ecF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 19
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "oB7hDf6E",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:07.084936+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "hwA8OLUhQ1qa3ecF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 1164
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "brave_search.call(query=\"current CEO of Meta\")",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "current CEO of Meta"
-                },
-                "call_id": "535c272b-768b-44fe-b303-2eae022f67f5",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "brave_search"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "AZ60Ocso",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:03.907918+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "hwA8OLUhQ1qa3ecF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 34
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "AZ60Ocso",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:03.907933+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "hwA8OLUhQ1qa3ecF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "AZ60Ocso",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:03.907936+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "hwA8OLUhQ1qa3ecF",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 44
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100 degrees Celsius",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "drZjZkfj",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:33.852666+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "Sn0I7GFHTxKxewK2",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 77
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "drZjZkfj",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:33.852692+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "Sn0I7GFHTxKxewK2",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 23
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "drZjZkfj",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:33.852699+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "Sn0I7GFHTxKxewK2",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 100
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of polyjuice is -100 degrees Celsius.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "WMEZtUXH",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:00:32.617998+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "f9RM1qaUTk2LvaVo",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 77
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "WMEZtUXH",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:00:32.618030+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "f9RM1qaUTk2LvaVo",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 23
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "WMEZtUXH",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:00:32.618036+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "f9RM1qaUTk2LvaVo",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 100
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function get_boiling_point is not",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " able",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " to find the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of \"polyjuice\" as",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " it",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " is not a real liquid",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". Polyju",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ice is a fictional substance from the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Harry Potter series.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "p7Vx9VAq",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:28.232189+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "WKEqFugATCeCl8mc",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 77
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "p7Vx9VAq",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:28.232325+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "WKEqFugATCeCl8mc",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 51
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "p7Vx9VAq",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:28.232334+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "WKEqFugATCeCl8mc",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 128
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function call should be",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ":\n[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_boiling_point(liquid_name='polyjuice', celci",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "us=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "JN7UZs_c",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:42.473221+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "H3r-_Zh-TVqtSp7k",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 86
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "JN7UZs_c",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:42.473254+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "H3r-_Zh-TVqtSp7k",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 34
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "JN7UZs_c",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:42.473261+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "H3r-_Zh-TVqtSp7k",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 120
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function `get_boiling_point`",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " is not a real function and cannot be",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " used to determine the boiling point of polyju",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ice. Polyjuice is a fictional substance from the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Harry Potter series and does not have a real-world boiling",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " point. If you have any other questions or need help",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " with a different topic, feel free to ask!",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "aCPTIc0d",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:53:27.227208+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "4DRyVE86RpCeqfpE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 86
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "aCPTIc0d",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:53:27.227251+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "4DRyVE86RpCeqfpE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 78
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "aCPTIc0d",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:53:27.227258+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "4DRyVE86RpCeqfpE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 164
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": true, \"liquid_name\": \"polyjuice\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"Unknown tool `get_boiling_point` was called.\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function call should be in the following format",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ": [function_name(parameters)]. However",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ", the function get_boiling_point is not recognized",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". If the function",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " is supposed to return the boiling point of a liquid, it should be defined",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " before it can be used. \n\nIn this",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " case, I will assume that the function get_boiling_point is defined as",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " follows:\ndef get",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_boiling_point(liquid_name, celcius=True):\n    # This",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " function returns the",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling point of a liquid in Celcius or Fahrenheit\n    boiling_points",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " = {\n        \"water\": 100,\n        \"polyjuice\":",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 120  # Assuming poly",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "juice has a boiling point of 120 degrees Cel",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "cius\n    }\n    if liquid",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_name in boiling_points:\n        if celcius:\n            return",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " boiling_points[liquid_name]\n        else:\n            return boiling_points[liquid",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "_name] * 9/5 + ",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "32\n    else:\n        return \"Boiling point not found",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "\"\n\nNow, the function call",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " should be: \n",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[get_boiling_point(liquid_name=\"polyju",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "ice\", celcius=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "NnkGeCwM",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:35.213901+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7ifSRjCjRIioDOte",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 86
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "NnkGeCwM",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:35.213925+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7ifSRjCjRIioDOte",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 234
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "NnkGeCwM",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:35.213931+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7ifSRjCjRIioDOte",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 320
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point(liquid_name='polyjuice",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "', celcius=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "d43b2636-903d-430d-8389-91eefe5a1d75",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "9EBiVeAT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:32.221646+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7kB12OwpSUOcwmJV",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "9EBiVeAT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:32.221673+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7kB12OwpSUOcwmJV",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "9EBiVeAT",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:32.221680+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "7kB12OwpSUOcwmJV",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 58
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point(liquid_name",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "='polyjuice', celcius=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "0548b2ef-daa4-4099-bb2c-b34f00752339",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "lc3YWIQH",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:00:31.366139+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "zDQV0rn3TNKfByA0",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "lc3YWIQH",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:00:31.366166+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "zDQV0rn3TNKfByA0",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "lc3YWIQH",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:00:31.366172+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "zDQV0rn3TNKfByA0",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 58
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"none\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "Poly",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "juice is a fictional potion from",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the Harry Potter series by J.K. Rowling. As it",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "'s not a real substance, it doesn't have a boiling point",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ". Polyjuice Potion is a magical concoction",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " that allows the drinker to assume the form and",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " appearance",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " of another person, but it's not a physical substance that can",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " be measured or analyzed in the same way as real-world",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " chemicals.\n\nIf you",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " have any other questions or",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " if there's anything else I can help you with, feel free to ask",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "!",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "M0oC9v8Y",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:30.531648+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "0CMlh2kQShSVm3zE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "M0oC9v8Y",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:30.531666+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "0CMlh2kQShSVm3zE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 113
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "M0oC9v8Y",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:30.531671+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "0CMlh2kQShSVm3zE",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 143
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "get_boiling_point(liquid_name='polyjuice', cel",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "cius=True)]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "celcius": true,
-                  "liquid_name": "polyjuice"
-                },
-                "call_id": "acbb04a1-08f4-4277-9b66-aadda2fa2be7",
-                "tool_name": "get_boiling_point"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "jMXDDKvp",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:26.175063+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "44TwzIrGS2aqfbVn",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 30
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "jMXDDKvp",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:26.175128+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "44TwzIrGS2aqfbVn",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 28
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "jMXDDKvp",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T02:04:26.175137+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "44TwzIrGS2aqfbVn",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 58
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(nth_prime(100))\"}, \"call_id\": \"<UUID>\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " 100th prime number is 541",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": ".",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "bxIams_G",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:13.404182+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "snO106yxStaL10ow",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 252
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "bxIams_G",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:13.404224+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "snO106yxStaL10ow",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 20
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "bxIams_G",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:13.404230+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "snO106yxStaL10ow",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 272
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "started"
-              },
-              "tool_call": "",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "def is_prime(n):\n    if n <= 1:\n        return False",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\n    if n <= 3:\n        return True",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "\n    if n % 2 == 0 or n % 3",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " == 0:\n        return False\n    i = 5\n   ",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " while i * i <= n:\n        if n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " % i == 0 or n % (i",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " + 2) == 0:\n            return False\n        i +=",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " 6\n    return True\n\ndef nth_prime(n):\n    count =",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " 0\n    num = 2\n    while True:\n        if",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": " is_prime(num):\n            count += 1\n            if count == n",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": ":\n                return num\n        num += 1\n\nprint(nth_prime",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "in_progress"
-              },
-              "tool_call": "(100))",
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "code": "def is_prime(n):\n    if n <= 1:\n        return False\n    if n <= 3:\n        return True\n    if n % 2 == 0 or n % 3 == 0:\n        return False\n    i = 5\n    while i * i <= n:\n        if n % i == 0 or n % (i + 2) == 0:\n            return False\n        i += 6\n    return True\n\ndef nth_prime(n):\n    count = 0\n    num = 2\n    while True:\n        if is_prime(num):\n            count += 1\n            if count == n:\n                return num\n        num += 1\n\nprint(nth_prime(100))"
-                },
-                "call_id": "e1110bc1-dc83-480d-ad33-09d49f5ccc8d",
-                "tool_name": {
-                  "__enum__": "BuiltinTool",
-                  "__module__": "llama_stack.models.llama.datatypes",
-                  "value": "code_interpreter"
-                }
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "5J3hM-La",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:09.121100+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "snO106yxStaL10ow",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 40
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "5J3hM-La",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:09.121127+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "snO106yxStaL10ow",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 10
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "5J3hM-La",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:44:09.121132+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "snO106yxStaL10ow",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 50
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity the company founding date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "Per",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "plexity the company was founded in 2022.",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "6jxCq3gU",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:50.430436+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XhZWljYTTDCYF7vI",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 68
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "6jxCq3gU",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:50.430477+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XhZWljYTTDCYF7vI",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 22
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "6jxCq3gU",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:50.430489+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XhZWljYTTDCYF7vI",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 90
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[k",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "nowledge_search(query=\"Perplexity the company",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " founding date\")]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "Perplexity the company founding date"
-                },
-                "call_id": "199ef050-bc11-4e4b-935d-f5241c3f40ef",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "m4wMGuSN",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:49.880525+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XhZWljYTTDCYF7vI",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 29
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "m4wMGuSN",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:49.880576+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XhZWljYTTDCYF7vI",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 23
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "m4wMGuSN",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:49.880585+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "XhZWljYTTDCYF7vI",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 52
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"NBA creation date\"}, \"call_id\": \"<UUID>\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"<UUID>\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "The",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " NBA was created on August 3, 1949, with",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " the merger of the Basketball Association of America (BAA) and the National",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": " Basketball League (NBL).",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "OyfVMRgR",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:53.322420+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "TMrhR55CR-KrmGp0",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 63
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "OyfVMRgR",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:53.322482+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "TMrhR55CR-KrmGp0",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 45
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "OyfVMRgR",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:53.322490+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "TMrhR55CR-KrmGp0",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 108
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  },
-  "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
-    "chunks": [
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "start"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "[k",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "nowledge_search(query=\"NBA creation date\")]",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": null
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "parse_status": {
-                "__enum__": "ToolCallParseStatus",
-                "__module__": "llama_stack.apis.common.content_types",
-                "value": "succeeded"
-              },
-              "tool_call": {
-                "arguments": {
-                  "query": "NBA creation date"
-                },
-                "call_id": "388e55ab-448a-4a98-905b-196c051bdeea",
-                "tool_name": "knowledge_search"
-              },
-              "type": "tool_call"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "progress"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": null
-        }
-      },
-      {
-        "__module__": "llama_stack.apis.inference.inference",
-        "__pydantic__": "ChatCompletionResponseStreamChunk",
-        "data": {
-          "event": {
-            "delta": {
-              "text": "",
-              "type": "text"
-            },
-            "event_type": {
-              "__enum__": "ChatCompletionResponseEventType",
-              "__module__": "llama_stack.apis.inference.inference",
-              "value": "complete"
-            },
-            "logprobs": null,
-            "stop_reason": {
-              "__enum__": "StopReason",
-              "__module__": "llama_stack.models.llama.datatypes",
-              "value": "end_of_turn"
-            }
-          },
-          "metrics": [
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "prompt_tokens",
-              "span_id": "QpFMmy3B",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:52.235138+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "TMrhR55CR-KrmGp0",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 27
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "completion_tokens",
-              "span_id": "QpFMmy3B",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:52.235160+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "TMrhR55CR-KrmGp0",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 20
-            },
-            {
-              "attributes": {
-                "model_id": "meta-llama/Llama-3.3-70B-Instruct",
-                "provider_id": "fireworks"
-              },
-              "metric": "total_tokens",
-              "span_id": "QpFMmy3B",
-              "timestamp": {
-                "__class__": "datetime",
-                "__datetime__": "2025-03-07T01:45:52.235165+00:00",
-                "__module__": "datetime"
-              },
-              "trace_id": "TMrhR55CR-KrmGp0",
-              "type": "metric",
-              "unit": "tokens",
-              "value": 47
-            }
-          ]
-        }
-      }
-    ],
-    "type": "generator"
-  }
-}
diff --git a/tests/integration/fixtures/recorded_responses/invoke_tool.json b/tests/integration/fixtures/recorded_responses/invoke_tool.json
deleted file mode 100644
index f3a2cfbcb..000000000
--- a/tests/integration/fixtures/recorded_responses/invoke_tool.json
+++ /dev/null
@@ -1,852 +0,0 @@
-{
-  "[[], {\"kwargs\": {\"code\": \"def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef get_nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(get_nth_prime(100))\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stdout]\n541\n[/stdout]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(nth_prime(100))\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stdout]\nNumber of rows and columns in the data: (10, 13)\nColumns of the data are: 13\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\n       'Oct', 'Nov', 'Dec'],\n      dtype='object')\nDatatype of the columns are: Year      int64\nJan     float64\nFeb     float64\nMar     float64\nApr     float64\nMay     float64\nJun     float64\nJul     float64\nAug     float64\nSep     float64\nOct     float64\nNov     float64\nDec     float64\ndtype: object\n[/stdout]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\\n# Sample of data\\nprint(\\\"Data sample from file:\\\")\\nprint(df.head())\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics about the dataframe\\nprint(df.describe())\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\nprint(df.head())\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stdout]\nYear  Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec\n0  2014  1.6  1.6  1.7  1.8  2.0  1.9  1.9  1.7  1.7  1.8  1.7  1.6\n1  2015  1.6  1.7  1.8  1.8  1.7  1.8  1.8  1.8  1.9  1.9  2.0  2.1\n2  2016  2.2  2.3  2.2  2.1  2.2  2.2  2.2  2.3  2.2  2.1  2.1  2.2\n3  2017  2.3  2.2  2.0  1.9  1.7  1.7  1.7  1.7  1.7  1.8  1.7  1.8\n4  2018  1.8  1.8  2.1  2.1  2.2  2.3  2.4  2.2  2.2  2.1  2.2  2.2\n[/stdout]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 142, in <module>\n  line 23, in <module>\n    from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\nImportError: attempted relative import with no known parent package\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(code_interpreter.get_file_path(\\\"<TEMP_FILE>\"))\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Calculate average yearly inflation\\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df['Year'], df['Average'])\\nplt.xlabel('Year')\\nplt.ylabel('Average Yearly Inflation')\\nplt.title('Average Yearly Inflation Over Time')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"inflation.csv\\\")\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n\\n# Convert the 'Year' column to datetime\\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\\n\\n# Group by 'Year' and calculate the average inflation\\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\\n\\n# Plot the average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"How to use LoRA in Torchtune\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"vector_db_<UUID>\"]}, \"tool_name\": \"knowledge_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:c4e00\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\n\n.. grid:: 2\n\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n      * What LoRA is and how it saves memory during finetuning\n      * An overview of LoRA components in torchtune\n      * How to run a LoRA finetune using torchtune\n      * How to experiment with different LoRA configurations\n\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n      * Be familiar with :ref:`torchtune<overview_label>`\n      * Make sure to :ref:`install torchtune<install_label>`\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\n\nWhat is LoRA?\n-------------\n\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW <https://py\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:c4e00\nContent:  LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n  # Build Llama2 without any LoRA layers\n  base_model = llama2_7b()\n\n  # The default settings for lora_llama2_7b will match those for llama2_7b\n  # We just need to define which layers we want LoRA applied to.\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n  # layers outside of the self-attention.\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\n    See :ref:`below<setting_trainable_params>` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n  # Print the first layer's self-attention in the usual Llama2 model\n  >>> print(base_model.layers[0].attn)\n  MultiHeadAttention(\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (pos_embeddings): RotaryPositionalEmbeddings()\n  )\n\n  # Print the same for Llama2 with LoRA weights\n  >>> print(lora_model.layers[0].attn)\n  MultiHeadAttention(\n    (q_proj): LoRALinear(\n      (dropout): Dropout(p=0.0, inplace=False)\n     \n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:c4e00\nContent: 06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n    for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 4:\nDocument_id:c4e00\nContent:  from our Llama2\nmodel without any wrappers or custom checkpoint conversion logic.\n\n.. code-block:: python\n\n  # Assuming that base_model already has the pretrained Llama2 weights,\n  # this will directly load them into your LoRA model without any conversion necessary.\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\n\n.. note::\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\n\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\n\n.. _setting_trainable_params:\n\n.. code-block:: python\n\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\n\n  # Fetch all params from the model that are associated with LoRA.\n  lora_params = get_adapter_params(lora_model)\n\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\n  set_trainable_params(lora_model, lora_params)\n\n  # Print the total number of parameters\n  total_params = sum([p.numel() for p in lora_model.parameters()])\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\n  print(\n    f\"\"\"\n    {total_params} total params,\n    {trainable_params}\" trainable params,\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\n    \"\"\"\n  )\n\n  6742609920 total params,\n  4194304 trainable params,\n  0.06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 5:\nDocument_id:c4e00\nContent: ,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\n\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\n\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\n\n.. image:: /_static/img/lora_experiment_loss_curves.png\n\n.. note::\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\n    using W&B in torchtune, see our \":ref:`wandb_logging`\" recipe.\n\n.. _lora_tutorial_memory_tradeoff_label:\n\nTrading off memory and model performance with LoRA\n--------------------------------------------------\n\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\non a single device using most commodity GPUs which support `bfloat16 <https://\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "c4e00391-aeb8-4d32-ac41-ae3242f38a19",
-            "c4e00391-aeb8-4d32-ac41-ae3242f38a19",
-            "c4e00391-aeb8-4d32-ac41-ae3242f38a19",
-            "c4e00391-aeb8-4d32-ac41-ae3242f38a19",
-            "c4e00391-aeb8-4d32-ac41-ae3242f38a19"
-          ]
-        }
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"Llama3-8B attention type\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"test-vector-db-<UUID>\"]}, \"tool_name\": \"knowledge_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:num-1\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\nThere are a few main changes between Llama2-7B and Llama3-8B models:\n\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\n- Llama3-\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:num-1\nContent:  instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\n\n|\n\nGetting access to Llama3-8B-Instruct\n------------------------------------\n\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\n\n\n.. code-block:: bash\n\n    tune download meta-llama/Meta-Llama-3\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:num-0\nContent: :`download Llama3 Instruct weights <llama3_label>`\n\n\nTemplate changes from Llama2 to Llama3\n--------------------------------------\n\nThe Llama2 chat model requires a specific template when prompting the pre-trained\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\ninference on the model, you'll need to use the same template for optimal performance\non chat data. Otherwise, the model will just perform standard text completion, which\nmay or may not align with your intended use case.\n\nFrom the `official Llama2 prompt\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\nfor the Llama2 chat model, we can see that special tags are added:\n\n.. code-block:: text\n\n    <s>[INST] <<SYS>>\n    You are a helpful, respectful, and honest assistant.\n    <</SYS>>\n\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\n\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 4:\nDocument_id:num-0\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\n\nThe tags are entirely different, and they are actually encoded differently than in\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\nLlama3 template to understand how.\n\n.. note::\n    The Llama3 Base model uses a `different prompt template\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\n    Llama3 Instruct.\n\n.. _prompt_template_vs_special_tokens:\n\nTokenizing prompt templates & special tokens\n--------------------------------------------\n\nLet's say I have a sample of a single user-assistant turn accompanied with a system\nprompt:\n\n.. code-block:: python\n\n    sample = [\n        {\n            \"role\": \"system\",\n            \"\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 5:\nDocument_id:num-3\nContent:  LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n  # Build Llama2 without any LoRA layers\n  base_model = llama2_7b()\n\n  # The default settings for lora_llama2_7b will match those for llama2_7b\n  # We just need to define which layers we want LoRA applied to.\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n  # layers outside of the self-attention.\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n    Calling :func:`lora_llama_2\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "num-1",
-            "num-1",
-            "num-0",
-            "num-0",
-            "num-3"
-          ]
-        }
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"Meta founder\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"web_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "{\"query\": \"Meta founder\", \"top_k\": [{\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\u00a9 2025 Meta\", \"score\": 0.81595254, \"raw_content\": null}, {\"title\": \"Meta - Leadership & Governance\", \"url\": \"https://investor.atmeta.com/leadership-and-governance/\", \"content\": \"Mr. Andreessen was a co-founder of Netscape Communications Corporation, a software company, serving in various positions, including Chief Technology Officer and Executive Vice President of Products. Ms. Killefer also served as Assistant Secretary for Management, Chief Financial Officer, and Chief Operating Officer of the U.S. Department of the Treasury from 1997 to 2000 and as a member of the IRS Oversight Board from 2000 to 2005, including as Chair of the IRS Oversight Board from 2002 to 2004. Ms. Travis has served as Executive Vice President and Chief Financial Officer of The Estee Lauder Companies Inc., a global manufacturer and marketer of skin care, makeup, fragrance and hair care products, since August 2012.\", \"score\": 0.46759978, \"raw_content\": null}, {\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Meta leadership: images of senior executives for download to use in articles about the company. ... Mark Zuckerberg, Founder, Chairman and Chief Executive Officer. Nick Clegg, President, Global Affairs. Joel Kaplan, Chief Global Affairs Officer. Susan Li, Chief Financial Officer.\", \"score\": 0.46482924, \"raw_content\": null}, {\"title\": \"Meta Platforms - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/Meta_Platforms\", \"content\": \"Following a period of intense scrutiny and damaging whistleblower leaks, news started to emerge on October 21, 2021, about Facebook's plan to rebrand the company and change its name.[15][54] In the Q3 2021 Earnings Call on October 25, Mark Zuckerberg discussed the ongoing criticism of the company's social services and the way it operates, and pointed to the pivoting efforts to building the metaverse \\u2013 without mentioning the rebranding and the name change.[55] The metaverse vision and the name change from Facebook, Inc. to Meta Platforms was introduced at Facebook Connect on October 28, 2021.[16] Based on Facebook's PR campaign, the name change reflects the company's shifting long term focus of building the metaverse, a digital extension of the physical world by social media, virtual reality and augmented reality features.[16][56]\", \"score\": 0.14999175, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/Mark_Zuckerberg\", \"content\": \"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\u9648\\u660e\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\", \"score\": 0.036911618, \"raw_content\": null}]}",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"NBA creation date\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"test-vector-db-<UUID>\"]}, \"tool_name\": \"knowledge_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\n    Konwinski was among the founding team at Databricks.\n    Yarats, the CTO, was an AI research scientist at Meta.\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:perpl\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "nba_wiki",
-            "perplexity_wiki",
-            "perplexity_wiki"
-          ]
-        }
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"Perplexity company founding date\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"test-vector-db-<UUID>\"]}, \"tool_name\": \"knowledge_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\n    Konwinski was among the founding team at Databricks.\n    Yarats, the CTO, was an AI research scientist at Meta.\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:perpl\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "perplexity_wiki",
-            "perplexity_wiki",
-            "nba_wiki"
-          ]
-        }
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"Perplexity the company founding date\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"test-vector-db-<UUID>\"]}, \"tool_name\": \"knowledge_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\n    Konwinski was among the founding team at Databricks.\n    Yarats, the CTO, was an AI research scientist at Meta.\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:perpl\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "perplexity_wiki",
-            "perplexity_wiki",
-            "nba_wiki"
-          ]
-        }
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"Torchtune documentation\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"vector_db_<UUID>\"]}, \"tool_name\": \"knowledge_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:9050a\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\nlook like so:\n\n.. code-block:: python\n\n    from torchtune.datasets import chat_dataset\n    from torchtune.models.llama3 import llama3_tokenizer\n\n    tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\n    ds = chat_dataset(\n        tokenizer=tokenizer,\n        source=\"json\",\n        data_files=\"data/my_data.json\",\n        split=\"train\",\n        conversation_column=\"dialogue\",\n        conversation_style=\"sharegpt\",\n    )\n\n.. code-block:: yaml\n\n    # In config\n    tokenizer:\n      _component_: torchtune.models.llama3.llama3_tokenizer\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\n\n    dataset:\n      _component_: torchtune.datasets.chat_dataset\n      source: json\n      data_files: data/my_data.json\n      split: train\n      conversation_column: dialogue\n      conversation_style: sharegpt\n\n.. note::\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\n    Dataset classes and they will honor them. This is useful for common parameters\n    such as specifying the data split with :code:`split` or configuration with\n    :code:`name`\n\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\nall messages according to their `recommendations <https://\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:c4e00\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\n\n.. grid:: 2\n\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n      * What LoRA is and how it saves memory during finetuning\n      * An overview of LoRA components in torchtune\n      * How to run a LoRA finetune using torchtune\n      * How to experiment with different LoRA configurations\n\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n      * Be familiar with :ref:`torchtune<overview_label>`\n      * Make sure to :ref:`install torchtune<install_label>`\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\n\nWhat is LoRA?\n-------------\n\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW <https://py\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:15efa\nContent: ` module, which we swap\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\n.. .. _glossary_fsdp2:\n\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 4:\nDocument_id:c4e00\nContent: 06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n    for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 5:\nDocument_id:15efa\nContent: etune\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\n\n.. code-block:: bash\n\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n  model.use_dora=True\n\n.. code-block:: yaml\n\n  model:\n    _component_: torchtune.models.lora_llama3_8b\n    use_dora: True\n\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\neven more memory savings!\n\n.. code-block:: bash\n\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n  model.apply_lora_to_mlp=True \\\n  model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\n  model.lora_rank=16 \\\n  model.lora_alpha=32 \\\n  model.use_dora=True \\\n  model.quantize_base=True\n\n.. code-block:: yaml\n\n  model:\n    _component_: torchtune.models.lora_llama3_8b\n    apply_lora_to_mlp: True\n    lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\n    lora_rank: 16\n    lora_alpha: 32\n    use_dora: True\n    quantize_base: True\n\n\n.. note::\n\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\n.. .. _glossary_fsdp2:\n\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "9050ae1c-eba1-4846-b550-2db1957fee7d",
-            "c4e00391-aeb8-4d32-ac41-ae3242f38a19",
-            "15efa3d7-f804-4d31-ab05-a5524d82b96a",
-            "c4e00391-aeb8-4d32-ac41-ae3242f38a19",
-            "15efa3d7-f804-4d31-ab05-a5524d82b96a"
-          ]
-        }
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"current CEO of Meta\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"web_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Meta - Leadership & Governance\", \"url\": \"https://investor.atmeta.com/leadership-and-governance/\", \"content\": \"Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. Mark is responsible for setting the overall direction and product strategy for the company. He leads the design of Meta's services and development of its core technology and infrastructure. Mark studied computer science at Harvard\", \"score\": 0.8342047, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"The 11 People Running Meta's $1 Trillion Social Media and ... - Observer\", \"url\": \"https://observer.com/2024/01/meta-facebook-top-executives/\", \"content\": \"Meta has one of the most stable leadership team in the tech industry. Almost all of Meta's top executives have been with the company for well over a decade. ... 39, cofounder, chairman and CEO\", \"score\": 0.45536873, \"raw_content\": null}, {\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Meta leadership: images of senior executives for download to use in articles about the company.\", \"score\": 0.21026355, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/Mark_Zuckerberg\", \"content\": \"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\u9648\\u660e\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\", \"score\": 0.05564338, \"raw_content\": null}]}",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"using LoRA in Torchtune\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"vector_db_<UUID>\"]}, \"tool_name\": \"knowledge_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:20e5d\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\n\n.. grid:: 2\n\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n      * What LoRA is and how it saves memory during finetuning\n      * An overview of LoRA components in torchtune\n      * How to run a LoRA finetune using torchtune\n      * How to experiment with different LoRA configurations\n\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n      * Be familiar with :ref:`torchtune<overview_label>`\n      * Make sure to :ref:`install torchtune<install_label>`\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\n\nWhat is LoRA?\n-------------\n\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW <https://py\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:20e5d\nContent:  LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n  # Build Llama2 without any LoRA layers\n  base_model = llama2_7b()\n\n  # The default settings for lora_llama2_7b will match those for llama2_7b\n  # We just need to define which layers we want LoRA applied to.\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n  # layers outside of the self-attention.\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\n    See :ref:`below<setting_trainable_params>` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n  # Print the first layer's self-attention in the usual Llama2 model\n  >>> print(base_model.layers[0].attn)\n  MultiHeadAttention(\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (pos_embeddings): RotaryPositionalEmbeddings()\n  )\n\n  # Print the same for Llama2 with LoRA weights\n  >>> print(lora_model.layers[0].attn)\n  MultiHeadAttention(\n    (q_proj): LoRALinear(\n      (dropout): Dropout(p=0.0, inplace=False)\n     \n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:20e5d\nContent: 06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n    for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 4:\nDocument_id:20e5d\nContent:  from our Llama2\nmodel without any wrappers or custom checkpoint conversion logic.\n\n.. code-block:: python\n\n  # Assuming that base_model already has the pretrained Llama2 weights,\n  # this will directly load them into your LoRA model without any conversion necessary.\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\n\n.. note::\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\n\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\n\n.. _setting_trainable_params:\n\n.. code-block:: python\n\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\n\n  # Fetch all params from the model that are associated with LoRA.\n  lora_params = get_adapter_params(lora_model)\n\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\n  set_trainable_params(lora_model, lora_params)\n\n  # Print the total number of parameters\n  total_params = sum([p.numel() for p in lora_model.parameters()])\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\n  print(\n    f\"\"\"\n    {total_params} total params,\n    {trainable_params}\" trainable params,\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\n    \"\"\"\n  )\n\n  6742609920 total params,\n  4194304 trainable params,\n  0.06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 5:\nDocument_id:20e5d\nContent: ,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\n\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\n\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\n\n.. image:: /_static/img/lora_experiment_loss_curves.png\n\n.. note::\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\n    using W&B in torchtune, see our \":ref:`wandb_logging`\" recipe.\n\n.. _lora_tutorial_memory_tradeoff_label:\n\nTrading off memory and model performance with LoRA\n--------------------------------------------------\n\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\non a single device using most commodity GPUs which support `bfloat16 <https://\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "20e5d737-1eef-4529-87bc-9759a59d943e",
-            "20e5d737-1eef-4529-87bc-9759a59d943e",
-            "20e5d737-1eef-4529-87bc-9759a59d943e",
-            "20e5d737-1eef-4529-87bc-9759a59d943e",
-            "20e5d737-1eef-4529-87bc-9759a59d943e"
-          ]
-        }
-      }
-    }
-  },
-  "[[], {\"kwargs\": {\"query\": \"when was the nba created\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"test-vector-db-<UUID>\"]}, \"tool_name\": \"knowledge_search\"}]": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\n    Konwinski was among the founding team at Databricks.\n    Yarats, the CTO, was an AI research scientist at Meta.\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:perpl\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "nba_wiki",
-            "perplexity_wiki",
-            "perplexity_wiki"
-          ]
-        }
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"code\": \"def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef get_nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(get_nth_prime(100))\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"<TEMP_FILE>\")\\nprint(df.head())\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"code_interpreter\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"query\": \"How to use LoRA in Torchtune\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"vector_db_<UUID>\"]}, \"tool_name\": \"knowledge_search\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:af027\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\n\n.. grid:: 2\n\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n      * What LoRA is and how it saves memory during finetuning\n      * An overview of LoRA components in torchtune\n      * How to run a LoRA finetune using torchtune\n      * How to experiment with different LoRA configurations\n\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n      * Be familiar with :ref:`torchtune<overview_label>`\n      * Make sure to :ref:`install torchtune<install_label>`\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\n\nWhat is LoRA?\n-------------\n\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW <https://py\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:af027\nContent:  LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n  # Build Llama2 without any LoRA layers\n  base_model = llama2_7b()\n\n  # The default settings for lora_llama2_7b will match those for llama2_7b\n  # We just need to define which layers we want LoRA applied to.\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n  # layers outside of the self-attention.\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\n    See :ref:`below<setting_trainable_params>` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n  # Print the first layer's self-attention in the usual Llama2 model\n  >>> print(base_model.layers[0].attn)\n  MultiHeadAttention(\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (pos_embeddings): RotaryPositionalEmbeddings()\n  )\n\n  # Print the same for Llama2 with LoRA weights\n  >>> print(lora_model.layers[0].attn)\n  MultiHeadAttention(\n    (q_proj): LoRALinear(\n      (dropout): Dropout(p=0.0, inplace=False)\n     \n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:af027\nContent: 06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n    for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 4:\nDocument_id:af027\nContent:  from our Llama2\nmodel without any wrappers or custom checkpoint conversion logic.\n\n.. code-block:: python\n\n  # Assuming that base_model already has the pretrained Llama2 weights,\n  # this will directly load them into your LoRA model without any conversion necessary.\n  lora_model.load_state_dict(base_model.state_dict(), strict=False)\n\n.. note::\n    Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\n    the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\n    :func:`validate_missing_and_unexpected_for_lora() <torchtune.modules.peft.validate_missing_and_unexpected_for_lora>`.\n\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\n\n.. _setting_trainable_params:\n\n.. code-block:: python\n\n  from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\n\n  # Fetch all params from the model that are associated with LoRA.\n  lora_params = get_adapter_params(lora_model)\n\n  # Set requires_grad=True on lora_params, and requires_grad=False on all others.\n  set_trainable_params(lora_model, lora_params)\n\n  # Print the total number of parameters\n  total_params = sum([p.numel() for p in lora_model.parameters()])\n  trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\n  print(\n    f\"\"\"\n    {total_params} total params,\n    {trainable_params}\" trainable params,\n    {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\n    \"\"\"\n  )\n\n  6742609920 total params,\n  4194304 trainable params,\n  0.06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 5:\nDocument_id:af027\nContent: ,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the default is to apply LoRA to Q and V projections with a rank of 8.\nSome experiments with LoRA have found that it can be beneficial to apply LoRA to all linear layers in\nthe self-attention, and to increase the rank to 16 or 32. Note that this is likely to increase our max memory,\nbut as long as we keep :code:`rank<<embed_dim`, the impact should be relatively minor.\n\nLet's run this experiment. We can also increase alpha (in general it is good practice to scale alpha and rank together).\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora \\\n    lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'] \\\n    lora_rank=32 lora_alpha=64 output_dir=./lora_experiment_1\n\nA comparison of the (smoothed) loss curves between this run and our baseline over the first 500 steps can be seen below.\n\n.. image:: /_static/img/lora_experiment_loss_curves.png\n\n.. note::\n    The above figure was generated with W&B. You can use torchtune's :class:`~torchtune.training.metric_logging.WandBLogger`\n    to generate similar loss curves, but you will need to install W&B and setup an account separately. For more details on\n    using W&B in torchtune, see our \":ref:`wandb_logging`\" recipe.\n\n.. _lora_tutorial_memory_tradeoff_label:\n\nTrading off memory and model performance with LoRA\n--------------------------------------------------\n\nIn the preceding example, we ran LoRA on two devices. But given LoRA's low memory footprint, we can run fine-tuning\non a single device using most commodity GPUs which support `bfloat16 <https://\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "af027703-518d-44e3-b7ab-ff5feb73b769",
-            "af027703-518d-44e3-b7ab-ff5feb73b769",
-            "af027703-518d-44e3-b7ab-ff5feb73b769",
-            "af027703-518d-44e3-b7ab-ff5feb73b769",
-            "af027703-518d-44e3-b7ab-ff5feb73b769"
-          ]
-        }
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"query\": \"Llama3-8B attention type\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"test-vector-db-<UUID>\"]}, \"tool_name\": \"knowledge_search\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:num-1\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\nThere are a few main changes between Llama2-7B and Llama3-8B models:\n\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\n- Llama3-\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:num-1\nContent:  instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\n\n|\n\nGetting access to Llama3-8B-Instruct\n------------------------------------\n\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\n\n\n.. code-block:: bash\n\n    tune download meta-llama/Meta-Llama-3\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:num-0\nContent: :`download Llama3 Instruct weights <llama3_label>`\n\n\nTemplate changes from Llama2 to Llama3\n--------------------------------------\n\nThe Llama2 chat model requires a specific template when prompting the pre-trained\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\ninference on the model, you'll need to use the same template for optimal performance\non chat data. Otherwise, the model will just perform standard text completion, which\nmay or may not align with your intended use case.\n\nFrom the `official Llama2 prompt\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\nfor the Llama2 chat model, we can see that special tags are added:\n\n.. code-block:: text\n\n    <s>[INST] <<SYS>>\n    You are a helpful, respectful, and honest assistant.\n    <</SYS>>\n\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\n\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 4:\nDocument_id:num-0\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\n\nThe tags are entirely different, and they are actually encoded differently than in\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\nLlama3 template to understand how.\n\n.. note::\n    The Llama3 Base model uses a `different prompt template\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\n    Llama3 Instruct.\n\n.. _prompt_template_vs_special_tokens:\n\nTokenizing prompt templates & special tokens\n--------------------------------------------\n\nLet's say I have a sample of a single user-assistant turn accompanied with a system\nprompt:\n\n.. code-block:: python\n\n    sample = [\n        {\n            \"role\": \"system\",\n            \"\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 5:\nDocument_id:num-3\nContent:  LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n  # Build Llama2 without any LoRA layers\n  base_model = llama2_7b()\n\n  # The default settings for lora_llama2_7b will match those for llama2_7b\n  # We just need to define which layers we want LoRA applied to.\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n  # layers outside of the self-attention.\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n    Calling :func:`lora_llama_2\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "num-1",
-            "num-1",
-            "num-0",
-            "num-0",
-            "num-3"
-          ]
-        }
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"query\": \"Perplexity company founding date\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"test-vector-db-<UUID>\"]}, \"tool_name\": \"knowledge_search\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\n    Konwinski was among the founding team at Databricks.\n    Yarats, the CTO, was an AI research scientist at Meta.\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:perpl\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "perplexity_wiki",
-            "perplexity_wiki",
-            "nba_wiki"
-          ]
-        }
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"query\": \"Torchtune documentation\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"vector_db_<UUID>\"]}, \"tool_name\": \"knowledge_search\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:61fc5\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\nlook like so:\n\n.. code-block:: python\n\n    from torchtune.datasets import chat_dataset\n    from torchtune.models.llama3 import llama3_tokenizer\n\n    tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\n    ds = chat_dataset(\n        tokenizer=tokenizer,\n        source=\"json\",\n        data_files=\"data/my_data.json\",\n        split=\"train\",\n        conversation_column=\"dialogue\",\n        conversation_style=\"sharegpt\",\n    )\n\n.. code-block:: yaml\n\n    # In config\n    tokenizer:\n      _component_: torchtune.models.llama3.llama3_tokenizer\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\n\n    dataset:\n      _component_: torchtune.datasets.chat_dataset\n      source: json\n      data_files: data/my_data.json\n      split: train\n      conversation_column: dialogue\n      conversation_style: sharegpt\n\n.. note::\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\n    Dataset classes and they will honor them. This is useful for common parameters\n    such as specifying the data split with :code:`split` or configuration with\n    :code:`name`\n\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\nall messages according to their `recommendations <https://\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:af027\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\n\n.. grid:: 2\n\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n      * What LoRA is and how it saves memory during finetuning\n      * An overview of LoRA components in torchtune\n      * How to run a LoRA finetune using torchtune\n      * How to experiment with different LoRA configurations\n\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n      * Be familiar with :ref:`torchtune<overview_label>`\n      * Make sure to :ref:`install torchtune<install_label>`\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\n\nWhat is LoRA?\n-------------\n\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW <https://py\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:d5787\nContent: ` module, which we swap\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\n.. .. _glossary_fsdp2:\n\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 4:\nDocument_id:af027\nContent: 06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n    for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 5:\nDocument_id:d5787\nContent: etune\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\n\n.. code-block:: bash\n\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n  model.use_dora=True\n\n.. code-block:: yaml\n\n  model:\n    _component_: torchtune.models.lora_llama3_8b\n    use_dora: True\n\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\neven more memory savings!\n\n.. code-block:: bash\n\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n  model.apply_lora_to_mlp=True \\\n  model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\n  model.lora_rank=16 \\\n  model.lora_alpha=32 \\\n  model.use_dora=True \\\n  model.quantize_base=True\n\n.. code-block:: yaml\n\n  model:\n    _component_: torchtune.models.lora_llama3_8b\n    apply_lora_to_mlp: True\n    lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\n    lora_rank: 16\n    lora_alpha: 32\n    use_dora: True\n    quantize_base: True\n\n\n.. note::\n\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\n.. .. _glossary_fsdp2:\n\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "61fc5307-4b19-4b23-ab6b-4abbd9614d2c",
-            "af027703-518d-44e3-b7ab-ff5feb73b769",
-            "d57876d1-5073-4954-b100-b192d52d04fe",
-            "af027703-518d-44e3-b7ab-ff5feb73b769",
-            "d57876d1-5073-4954-b100-b192d52d04fe"
-          ]
-        }
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"query\": \"current CEO of Meta\", \"session_id\": \"<UUID>\"}, \"tool_name\": \"web_search\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": "{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\u2018Boz\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\u2019s finance and facilities team to keep track of the company\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) - Investopedia\", \"url\": \"https://www.investopedia.com/terms/m/mark-zuckerberg.asp\", \"content\": \"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg is a self-taught computer programmer and co-founder, chair, and chief executive officer of Meta (META), formerly known as Facebook. Mark Zuckerberg is a self-taught computer programmer and the co-founder, chair, and CEO of Meta (formerly Facebook). In April 2018, Zuckerberg testified on Capitol Hill about Facebook's use of users' information, including the sharing of 87 million users' information to Cambridge Analytica. Technically, Mark Zuckerberg makes a salary of $1 a year at Facebook. Booker Join With Facebook Founder and CEO Mark Zuckerberg to Advance a National Model for Improving Public Schools.\\\"\", \"score\": 0.74697095, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Forbes\", \"url\": \"https://www.forbes.com/profile/mark-zuckerberg/\", \"content\": \"Meta CEO Mark Zuckerberg \\u201cloved\\u201d an image on Facebook known as \\\"Challah Horse\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\", \"score\": 0.6410185, \"raw_content\": null}]}",
-        "error_code": null,
-        "error_message": null,
-        "metadata": null
-      }
-    }
-  },
-  "[]_{\"kwargs\": {\"query\": \"when was the nba created\", \"session_id\": \"<UUID>\", \"vector_db_ids\": [\"test-vector-db-<UUID>\"]}, \"tool_name\": \"knowledge_search\"}": {
-    "type": "value",
-    "value": {
-      "__module__": "llama_stack.apis.tools.tools",
-      "__pydantic__": "ToolInvocationResult",
-      "data": {
-        "content": [
-          {
-            "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 1:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 2:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\n    Konwinski was among the founding team at Databricks.\n    Yarats, the CTO, was an AI research scientist at Meta.\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "Result 3:\nDocument_id:perpl\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
-            "type": "text"
-          },
-          {
-            "text": "END of knowledge_search tool results.\n",
-            "type": "text"
-          }
-        ],
-        "error_code": null,
-        "error_message": null,
-        "metadata": {
-          "document_ids": [
-            "nba_wiki",
-            "perplexity_wiki",
-            "perplexity_wiki"
-          ]
-        }
-      }
-    }
-  }
-}
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 0905d5817..190840f70 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -75,19 +75,24 @@ def openai_client(client_with_models):
     return OpenAI(base_url=base_url, api_key="bar")
 
 
+@pytest.fixture(params=["openai_client", "llama_stack_client"])
+def compat_client(request):
+    return request.getfixturevalue(request.param)
+
+
 @pytest.mark.parametrize(
     "test_case",
     [
         "inference:completion:sanity",
     ],
 )
-def test_openai_completion_non_streaming(openai_client, client_with_models, text_model_id, test_case):
+def test_openai_completion_non_streaming(llama_stack_client, client_with_models, text_model_id, test_case):
     skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
     tc = TestCase(test_case)
 
     # ollama needs more verbose prompting for some reason here...
     prompt = "Respond to this question and explain your answer. " + tc["content"]
-    response = openai_client.completions.create(
+    response = llama_stack_client.completions.create(
         model=text_model_id,
         prompt=prompt,
         stream=False,
@@ -103,19 +108,19 @@ def test_openai_completion_non_streaming(openai_client, client_with_models, text
         "inference:completion:sanity",
     ],
 )
-def test_openai_completion_streaming(openai_client, client_with_models, text_model_id, test_case):
+def test_openai_completion_streaming(llama_stack_client, client_with_models, text_model_id, test_case):
     skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
     tc = TestCase(test_case)
 
     # ollama needs more verbose prompting for some reason here...
     prompt = "Respond to this question and explain your answer. " + tc["content"]
-    response = openai_client.completions.create(
+    response = llama_stack_client.completions.create(
         model=text_model_id,
         prompt=prompt,
         stream=True,
         max_tokens=50,
     )
-    streamed_content = [chunk.choices[0].text for chunk in response]
+    streamed_content = [chunk.choices[0].text or "" for chunk in response]
     content_str = "".join(streamed_content).lower().strip()
     assert len(content_str) > 10
 
@@ -127,11 +132,11 @@ def test_openai_completion_streaming(openai_client, client_with_models, text_mod
         0,
     ],
 )
-def test_openai_completion_prompt_logprobs(openai_client, client_with_models, text_model_id, prompt_logprobs):
+def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
     skip_if_provider_isnt_vllm(client_with_models, text_model_id)
 
     prompt = "Hello, world!"
-    response = openai_client.completions.create(
+    response = llama_stack_client.completions.create(
         model=text_model_id,
         prompt=prompt,
         stream=False,
@@ -144,11 +149,11 @@ def test_openai_completion_prompt_logprobs(openai_client, client_with_models, te
     assert len(choice.prompt_logprobs) > 0
 
 
-def test_openai_completion_guided_choice(openai_client, client_with_models, text_model_id):
+def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
     skip_if_provider_isnt_vllm(client_with_models, text_model_id)
 
     prompt = "I am feeling really sad today."
-    response = openai_client.completions.create(
+    response = llama_stack_client.completions.create(
         model=text_model_id,
         prompt=prompt,
         stream=False,
@@ -161,6 +166,9 @@ def test_openai_completion_guided_choice(openai_client, client_with_models, text
     assert choice.text in ["joy", "sadness"]
 
 
+# Run the chat-completion tests with both the OpenAI client and the LlamaStack client
+
+
 @pytest.mark.parametrize(
     "test_case",
     [
@@ -168,13 +176,13 @@ def test_openai_completion_guided_choice(openai_client, client_with_models, text
         "inference:chat_completion:non_streaming_02",
     ],
 )
-def test_openai_chat_completion_non_streaming(openai_client, client_with_models, text_model_id, test_case):
+def test_openai_chat_completion_non_streaming(compat_client, client_with_models, text_model_id, test_case):
     skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
     tc = TestCase(test_case)
     question = tc["question"]
     expected = tc["expected"]
 
-    response = openai_client.chat.completions.create(
+    response = compat_client.chat.completions.create(
         model=text_model_id,
         messages=[
             {
@@ -196,13 +204,13 @@ def test_openai_chat_completion_non_streaming(openai_client, client_with_models,
         "inference:chat_completion:streaming_02",
     ],
 )
-def test_openai_chat_completion_streaming(openai_client, client_with_models, text_model_id, test_case):
+def test_openai_chat_completion_streaming(compat_client, client_with_models, text_model_id, test_case):
     skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
     tc = TestCase(test_case)
     question = tc["question"]
     expected = tc["expected"]
 
-    response = openai_client.chat.completions.create(
+    response = compat_client.chat.completions.create(
         model=text_model_id,
         messages=[{"role": "user", "content": question}],
         stream=True,
@@ -214,3 +222,160 @@ def test_openai_chat_completion_streaming(openai_client, client_with_models, tex
             streamed_content.append(chunk.choices[0].delta.content.lower().strip())
     assert len(streamed_content) > 0
     assert expected.lower() in "".join(streamed_content)
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:chat_completion:streaming_01",
+        "inference:chat_completion:streaming_02",
+    ],
+)
+def test_openai_chat_completion_streaming_with_n(compat_client, client_with_models, text_model_id, test_case):
+    skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
+
+    provider = provider_from_model(client_with_models, text_model_id)
+    if provider.provider_type == "remote::ollama":
+        pytest.skip(f"Model {text_model_id} hosted by {provider.provider_type} doesn't support n > 1.")
+
+    tc = TestCase(test_case)
+    question = tc["question"]
+    expected = tc["expected"]
+
+    response = compat_client.chat.completions.create(
+        model=text_model_id,
+        messages=[{"role": "user", "content": question}],
+        stream=True,
+        timeout=120,  # Increase timeout to 2 minutes for large conversation history,
+        n=2,
+    )
+    streamed_content = {}
+    for chunk in response:
+        for choice in chunk.choices:
+            if choice.delta.content:
+                streamed_content[choice.index] = (
+                    streamed_content.get(choice.index, "") + choice.delta.content.lower().strip()
+                )
+    assert len(streamed_content) == 2
+    for i, content in streamed_content.items():
+        assert expected.lower() in content, f"Choice {i}: Expected {expected.lower()} in {content}"
+
+
+@pytest.mark.parametrize(
+    "stream",
+    [
+        True,
+        False,
+    ],
+)
+def test_inference_store(compat_client, client_with_models, text_model_id, stream):
+    skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
+    client = compat_client
+    # make a chat completion
+    message = "Hello, world!"
+    response = client.chat.completions.create(
+        model=text_model_id,
+        messages=[
+            {
+                "role": "user",
+                "content": message,
+            }
+        ],
+        stream=stream,
+    )
+    if stream:
+        # accumulate the streamed content
+        content = ""
+        response_id = None
+        for chunk in response:
+            if response_id is None:
+                response_id = chunk.id
+            if chunk.choices[0].delta.content:
+                content += chunk.choices[0].delta.content
+    else:
+        response_id = response.id
+        content = response.choices[0].message.content
+
+    responses = client.chat.completions.list()
+    assert response_id in [r.id for r in responses.data]
+
+    retrieved_response = client.chat.completions.retrieve(response_id)
+    assert retrieved_response.id == response_id
+    assert retrieved_response.choices[0].message.content == content, retrieved_response
+
+    input_content = (
+        getattr(retrieved_response.input_messages[0], "content", None)
+        or retrieved_response.input_messages[0]["content"]
+    )
+    assert input_content == message, retrieved_response
+
+
+@pytest.mark.parametrize(
+    "stream",
+    [
+        True,
+        False,
+    ],
+)
+def test_inference_store_tool_calls(compat_client, client_with_models, text_model_id, stream):
+    skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
+    client = compat_client
+    # make a chat completion
+    message = "What's the weather in Tokyo? Use the get_weather function to get the weather."
+    response = client.chat.completions.create(
+        model=text_model_id,
+        messages=[
+            {
+                "role": "user",
+                "content": message,
+            }
+        ],
+        stream=stream,
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get the weather in a given city",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "city": {"type": "string", "description": "The city to get the weather for"},
+                        },
+                    },
+                },
+            }
+        ],
+    )
+    if stream:
+        # accumulate the streamed content
+        content = ""
+        response_id = None
+        for chunk in response:
+            if response_id is None:
+                response_id = chunk.id
+            if delta := chunk.choices[0].delta:
+                if delta.content:
+                    content += delta.content
+    else:
+        response_id = response.id
+        content = response.choices[0].message.content
+
+    responses = client.chat.completions.list()
+    assert response_id in [r.id for r in responses.data]
+
+    retrieved_response = client.chat.completions.retrieve(response_id)
+    assert retrieved_response.id == response_id
+    input_content = (
+        getattr(retrieved_response.input_messages[0], "content", None)
+        or retrieved_response.input_messages[0]["content"]
+    )
+    assert input_content == message, retrieved_response
+    tool_calls = retrieved_response.choices[0].message.tool_calls
+    # sometimes model doesn't ouptut tool calls, but we still want to test that the tool was called
+    if tool_calls:
+        assert len(tool_calls) == 1
+        assert tool_calls[0].function.name == "get_weather"
+        assert "tokyo" in tool_calls[0].function.arguments.lower()
+    else:
+        assert retrieved_response.choices[0].message.content == content
diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py
new file mode 100644
index 000000000..759556257
--- /dev/null
+++ b/tests/integration/inference/test_openai_embeddings.py
@@ -0,0 +1,275 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import base64
+import struct
+
+import pytest
+from openai import OpenAI
+
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+
+def decode_base64_to_floats(base64_string: str) -> list[float]:
+    """Helper function to decode base64 string to list of float32 values."""
+    embedding_bytes = base64.b64decode(base64_string)
+    float_count = len(embedding_bytes) // 4  # 4 bytes per float32
+    embedding_floats = struct.unpack(f"{float_count}f", embedding_bytes)
+    return list(embedding_floats)
+
+
+def provider_from_model(client_with_models, model_id):
+    models = {m.identifier: m for m in client_with_models.models.list()}
+    models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
+    provider_id = models[model_id].provider_id
+    providers = {p.provider_id: p for p in client_with_models.providers.list()}
+    return providers[provider_id]
+
+
+def skip_if_model_doesnt_support_variable_dimensions(model_id):
+    if "text-embedding-3" not in model_id:
+        pytest.skip("{model_id} does not support variable output embedding dimensions")
+
+
+def skip_if_model_doesnt_support_openai_embeddings(client_with_models, model_id):
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI embeddings are not supported when testing with library client yet.")
+
+    provider = provider_from_model(client_with_models, model_id)
+    if provider.provider_type in (
+        "inline::meta-reference",
+        "remote::bedrock",
+        "remote::cerebras",
+        "remote::databricks",
+        "remote::runpod",
+        "remote::sambanova",
+        "remote::tgi",
+        "remote::ollama",
+    ):
+        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI embeddings.")
+
+
+@pytest.fixture
+def openai_client(client_with_models):
+    base_url = f"{client_with_models.base_url}/v1/openai/v1"
+    return OpenAI(base_url=base_url, api_key="fake")
+
+
+def test_openai_embeddings_single_string(openai_client, client_with_models, embedding_model_id):
+    """Test OpenAI embeddings endpoint with a single string input."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+
+    input_text = "Hello, world!"
+
+    response = openai_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        encoding_format="float",
+    )
+
+    assert response.object == "list"
+    assert response.model == embedding_model_id
+    assert len(response.data) == 1
+    assert response.data[0].object == "embedding"
+    assert response.data[0].index == 0
+    assert isinstance(response.data[0].embedding, list)
+    assert len(response.data[0].embedding) > 0
+    assert all(isinstance(x, float) for x in response.data[0].embedding)
+
+
+def test_openai_embeddings_multiple_strings(openai_client, client_with_models, embedding_model_id):
+    """Test OpenAI embeddings endpoint with multiple string inputs."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+
+    input_texts = ["Hello, world!", "How are you today?", "This is a test."]
+
+    response = openai_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_texts,
+    )
+
+    assert response.object == "list"
+    assert response.model == embedding_model_id
+    assert len(response.data) == len(input_texts)
+
+    for i, embedding_data in enumerate(response.data):
+        assert embedding_data.object == "embedding"
+        assert embedding_data.index == i
+        assert isinstance(embedding_data.embedding, list)
+        assert len(embedding_data.embedding) > 0
+        assert all(isinstance(x, float) for x in embedding_data.embedding)
+
+
+def test_openai_embeddings_with_encoding_format_float(openai_client, client_with_models, embedding_model_id):
+    """Test OpenAI embeddings endpoint with float encoding format."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+
+    input_text = "Test encoding format"
+
+    response = openai_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        encoding_format="float",
+    )
+
+    assert response.object == "list"
+    assert len(response.data) == 1
+    assert isinstance(response.data[0].embedding, list)
+    assert all(isinstance(x, float) for x in response.data[0].embedding)
+
+
+def test_openai_embeddings_with_dimensions(openai_client, client_with_models, embedding_model_id):
+    """Test OpenAI embeddings endpoint with custom dimensions parameter."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+    skip_if_model_doesnt_support_variable_dimensions(embedding_model_id)
+
+    input_text = "Test dimensions parameter"
+    dimensions = 16
+
+    response = openai_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        dimensions=dimensions,
+    )
+
+    assert response.object == "list"
+    assert len(response.data) == 1
+    # Note: Not all models support custom dimensions, so we don't assert the exact dimension
+    assert isinstance(response.data[0].embedding, list)
+    assert len(response.data[0].embedding) > 0
+
+
+def test_openai_embeddings_with_user_parameter(openai_client, client_with_models, embedding_model_id):
+    """Test OpenAI embeddings endpoint with user parameter."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+
+    input_text = "Test user parameter"
+    user_id = "test-user-123"
+
+    response = openai_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        user=user_id,
+    )
+
+    assert response.object == "list"
+    assert len(response.data) == 1
+    assert isinstance(response.data[0].embedding, list)
+    assert len(response.data[0].embedding) > 0
+
+
+def test_openai_embeddings_empty_list_error(openai_client, client_with_models, embedding_model_id):
+    """Test that empty list input raises an appropriate error."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+
+    with pytest.raises(Exception):  # noqa: B017
+        openai_client.embeddings.create(
+            model=embedding_model_id,
+            input=[],
+        )
+
+
+def test_openai_embeddings_invalid_model_error(openai_client, client_with_models, embedding_model_id):
+    """Test that invalid model ID raises an appropriate error."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+
+    with pytest.raises(Exception):  # noqa: B017
+        openai_client.embeddings.create(
+            model="invalid-model-id",
+            input="Test text",
+        )
+
+
+def test_openai_embeddings_different_inputs_different_outputs(openai_client, client_with_models, embedding_model_id):
+    """Test that different inputs produce different embeddings."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+
+    input_text1 = "This is the first text"
+    input_text2 = "This is completely different content"
+
+    response1 = openai_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text1,
+    )
+
+    response2 = openai_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text2,
+    )
+
+    embedding1 = response1.data[0].embedding
+    embedding2 = response2.data[0].embedding
+
+    assert len(embedding1) == len(embedding2)
+    # Embeddings should be different for different inputs
+    assert embedding1 != embedding2
+
+
+def test_openai_embeddings_with_encoding_format_base64(openai_client, client_with_models, embedding_model_id):
+    """Test OpenAI embeddings endpoint with base64 encoding format."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+    skip_if_model_doesnt_support_variable_dimensions(embedding_model_id)
+
+    input_text = "Test base64 encoding format"
+    dimensions = 12
+
+    response = openai_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        encoding_format="base64",
+        dimensions=dimensions,
+    )
+
+    # Validate response structure
+    assert response.object == "list"
+    assert len(response.data) == 1
+
+    # With base64 encoding, embedding should be a string, not a list
+    embedding_data = response.data[0]
+    assert embedding_data.object == "embedding"
+    assert embedding_data.index == 0
+    assert isinstance(embedding_data.embedding, str)
+
+    # Verify it's valid base64 and decode to floats
+    embedding_floats = decode_base64_to_floats(embedding_data.embedding)
+
+    # Verify we got valid floats
+    assert len(embedding_floats) == dimensions, f"Got embedding length {len(embedding_floats)}, expected {dimensions}"
+    assert all(isinstance(x, float) for x in embedding_floats)
+
+
+def test_openai_embeddings_base64_batch_processing(openai_client, client_with_models, embedding_model_id):
+    """Test OpenAI embeddings endpoint with base64 encoding for batch processing."""
+    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+
+    input_texts = ["First text for base64", "Second text for base64", "Third text for base64"]
+
+    response = openai_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_texts,
+        encoding_format="base64",
+    )
+
+    # Validate response structure
+    assert response.object == "list"
+    assert response.model == embedding_model_id
+    assert len(response.data) == len(input_texts)
+
+    # Validate each embedding in the batch
+    embedding_dimensions = []
+    for i, embedding_data in enumerate(response.data):
+        assert embedding_data.object == "embedding"
+        assert embedding_data.index == i
+
+        # With base64 encoding, embedding should be a string, not a list
+        assert isinstance(embedding_data.embedding, str)
+        embedding_floats = decode_base64_to_floats(embedding_data.embedding)
+        assert len(embedding_floats) > 0
+        assert all(isinstance(x, float) for x in embedding_floats)
+        embedding_dimensions.append(len(embedding_floats))
+
+    # All embeddings should have the same dimensionality
+    assert all(dim == embedding_dimensions[0] for dim in embedding_dimensions)
diff --git a/tests/integration/inference/test_text_inference.py b/tests/integration/inference/test_text_inference.py
index a3cfce4fd..08e19726e 100644
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@@ -30,12 +30,25 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
             "remote::anthropic",
             "remote::gemini",
             "remote::groq",
+            "remote::sambanova",
         )
         or "openai-compat" in provider.provider_type
     ):
         pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
 
 
+def skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, model_id):
+    models = {m.identifier: m for m in client_with_models.models.list()}
+    models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
+    provider_id = models[model_id].provider_id
+    providers = {p.provider_id: p for p in client_with_models.providers.list()}
+    provider = providers[provider_id]
+    if provider.provider_type in ("remote::sambanova",):
+        pytest.skip(
+            f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
+        )
+
+
 def get_llama_model(client_with_models, model_id):
     models = {}
     for m in client_with_models.models.list():
@@ -384,6 +397,8 @@ def test_text_chat_completion_with_tool_choice_none(client_with_models, text_mod
     ],
 )
 def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case):
+    skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
+
     class NBAStats(BaseModel):
         year_for_draft: int
         num_seasons_in_nba: int
@@ -458,18 +473,12 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
     [
         # Tests if the model can handle simple messages like "Hi" or
         # a message unrelated to one of the tool calls
-        "inference:chat_completion:multi_turn_tool_calling_01",
+        "inference:chat_completion:text_then_tool",
         # Tests if the model can do full tool call with responses correctly
-        "inference:chat_completion:multi_turn_tool_calling_02",
+        "inference:chat_completion:tool_then_answer",
         # Tests if model can generate multiple params and
         # read outputs correctly
-        "inference:chat_completion:multi_turn_tool_calling_03",
-        # Tests if model can do different tool calls in a seqeunce
-        # and use the information between appropriately
-        "inference:chat_completion:multi_turn_tool_calling_04",
-        # Tests if model can use current date and run multiple tool calls
-        # sequentially and infer using both
-        "inference:chat_completion:multi_turn_tool_calling_05",
+        "inference:chat_completion:array_parameter",
     ],
 )
 def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
diff --git a/tests/integration/metadata.py b/tests/integration/metadata.py
deleted file mode 100644
index 55663c046..000000000
--- a/tests/integration/metadata.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.providers.datatypes import Api
-
-INFERENCE_API_CAPA_TEST_MAP = {
-    "chat_completion": {
-        "streaming": [
-            "test_text_chat_completion_streaming",
-            "test_image_chat_completion_streaming",
-        ],
-        "non_streaming": [
-            "test_image_chat_completion_non_streaming",
-            "test_text_chat_completion_non_streaming",
-        ],
-        "tool_calling": [
-            "test_text_chat_completion_with_tool_calling_and_streaming",
-            "test_text_chat_completion_with_tool_calling_and_non_streaming",
-        ],
-        "log_probs": [
-            "test_completion_log_probs_non_streaming",
-            "test_completion_log_probs_streaming",
-        ],
-    },
-    "completion": {
-        "streaming": ["test_text_completion_streaming"],
-        "non_streaming": ["test_text_completion_non_streaming"],
-        "structured_output": ["test_text_completion_structured_output"],
-    },
-}
-
-VECTORIO_API_TEST_MAP = {
-    "retrieve": {
-        "": ["test_vector_db_retrieve"],
-    }
-}
-
-AGENTS_API_TEST_MAP = {
-    "create_agent_turn": {
-        "rag": ["test_rag_agent"],
-        "custom_tool": ["test_custom_tool"],
-        "code_execution": ["test_code_interpreter_for_attachments"],
-    }
-}
-
-
-API_MAPS = {
-    Api.inference: INFERENCE_API_CAPA_TEST_MAP,
-    Api.vector_io: VECTORIO_API_TEST_MAP,
-    Api.agents: AGENTS_API_TEST_MAP,
-}
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index 3e22bc5a7..bb4639d17 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -3,22 +3,39 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import List
+
+import logging
+import sys
+import time
+import uuid
 
 import pytest
 
-from llama_stack.apis.common.job_types import JobStatus
 from llama_stack.apis.post_training import (
-    Checkpoint,
     DataConfig,
     LoraFinetuningConfig,
-    OptimizerConfig,
-    PostTrainingJob,
-    PostTrainingJobArtifactsResponse,
-    PostTrainingJobStatusResponse,
     TrainingConfig,
 )
 
+# Configure logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
+logger = logging.getLogger(__name__)
+
+
+@pytest.fixture(autouse=True)
+def capture_output(capsys):
+    """Fixture to capture and display output during test execution."""
+    yield
+    captured = capsys.readouterr()
+    if captured.out:
+        print("\nCaptured stdout:", captured.out)
+    if captured.err:
+        print("\nCaptured stderr:", captured.err)
+
+
+# Force flush stdout to see prints immediately
+sys.stdout.reconfigure(line_buffering=True)
+
 # How to run this test:
 #
 # pytest llama_stack/providers/tests/post_training/test_post_training.py
@@ -26,10 +43,31 @@ from llama_stack.apis.post_training import (
 #   -v -s --tb=short --disable-warnings
 
 
-@pytest.mark.skip(reason="FIXME FIXME @yanxi0830 this needs to be migrated to use the API")
 class TestPostTraining:
-    @pytest.mark.asyncio
-    async def test_supervised_fine_tune(self, post_training_stack):
+    @pytest.mark.integration
+    @pytest.mark.parametrize(
+        "purpose, source",
+        [
+            (
+                "post-training/messages",
+                {
+                    "type": "uri",
+                    "uri": "huggingface://datasets/llamastack/simpleqa?split=train",
+                },
+            ),
+        ],
+    )
+    @pytest.mark.timeout(360)  # 6 minutes timeout
+    def test_supervised_fine_tune(self, llama_stack_client, purpose, source):
+        logger.info("Starting supervised fine-tuning test")
+
+        # register dataset to train
+        dataset = llama_stack_client.datasets.register(
+            purpose=purpose,
+            source=source,
+        )
+        logger.info(f"Registered dataset with ID: {dataset.identifier}")
+
         algorithm_config = LoraFinetuningConfig(
             type="LoRA",
             lora_attn_modules=["q_proj", "v_proj", "output_proj"],
@@ -40,62 +78,74 @@ class TestPostTraining:
         )
 
         data_config = DataConfig(
-            dataset_id="alpaca",
+            dataset_id=dataset.identifier,
             batch_size=1,
             shuffle=False,
+            data_format="instruct",
         )
 
-        optimizer_config = OptimizerConfig(
-            optimizer_type="adamw",
-            lr=3e-4,
-            lr_min=3e-5,
-            weight_decay=0.1,
-            num_warmup_steps=100,
-        )
-
+        # setup training config with minimal settings
         training_config = TrainingConfig(
             n_epochs=1,
             data_config=data_config,
-            optimizer_config=optimizer_config,
             max_steps_per_epoch=1,
             gradient_accumulation_steps=1,
         )
-        post_training_impl = post_training_stack
-        response = await post_training_impl.supervised_fine_tune(
-            job_uuid="1234",
-            model="Llama3.2-3B-Instruct",
+
+        job_uuid = f"test-job{uuid.uuid4()}"
+        logger.info(f"Starting training job with UUID: {job_uuid}")
+
+        # train with HF trl SFTTrainer as the default
+        _ = llama_stack_client.post_training.supervised_fine_tune(
+            job_uuid=job_uuid,
+            model="ibm-granite/granite-3.3-2b-instruct",
             algorithm_config=algorithm_config,
             training_config=training_config,
             hyperparam_search_config={},
             logger_config={},
-            checkpoint_dir="null",
+            checkpoint_dir=None,
         )
-        assert isinstance(response, PostTrainingJob)
-        assert response.job_uuid == "1234"
 
-    @pytest.mark.asyncio
-    async def test_get_training_jobs(self, post_training_stack):
-        post_training_impl = post_training_stack
-        jobs_list = await post_training_impl.get_training_jobs()
-        assert isinstance(jobs_list, List)
-        assert jobs_list[0].job_uuid == "1234"
+        while True:
+            status = llama_stack_client.post_training.job.status(job_uuid=job_uuid)
+            if not status:
+                logger.error("Job not found")
+                break
 
-    @pytest.mark.asyncio
-    async def test_get_training_job_status(self, post_training_stack):
-        post_training_impl = post_training_stack
-        job_status = await post_training_impl.get_training_job_status("1234")
-        assert isinstance(job_status, PostTrainingJobStatusResponse)
-        assert job_status.job_uuid == "1234"
-        assert job_status.status == JobStatus.completed
-        assert isinstance(job_status.checkpoints[0], Checkpoint)
+            logger.info(f"Current status: {status}")
+            if status.status == "completed":
+                break
 
-    @pytest.mark.asyncio
-    async def test_get_training_job_artifacts(self, post_training_stack):
-        post_training_impl = post_training_stack
-        job_artifacts = await post_training_impl.get_training_job_artifacts("1234")
-        assert isinstance(job_artifacts, PostTrainingJobArtifactsResponse)
-        assert job_artifacts.job_uuid == "1234"
-        assert isinstance(job_artifacts.checkpoints[0], Checkpoint)
-        assert job_artifacts.checkpoints[0].identifier == "Llama3.2-3B-Instruct-sft-0"
-        assert job_artifacts.checkpoints[0].epoch == 0
-        assert "/.llama/checkpoints/Llama3.2-3B-Instruct-sft-0" in job_artifacts.checkpoints[0].path
+            logger.info("Waiting for job to complete...")
+            time.sleep(10)  # Increased sleep time to reduce polling frequency
+
+        artifacts = llama_stack_client.post_training.job.artifacts(job_uuid=job_uuid)
+        logger.info(f"Job artifacts: {artifacts}")
+
+    # TODO: Fix these tests to properly represent the Jobs API in training
+    # @pytest.mark.asyncio
+    # async def test_get_training_jobs(self, post_training_stack):
+    #     post_training_impl = post_training_stack
+    #     jobs_list = await post_training_impl.get_training_jobs()
+    #     assert isinstance(jobs_list, list)
+    #     assert jobs_list[0].job_uuid == "1234"
+
+    # @pytest.mark.asyncio
+    # async def test_get_training_job_status(self, post_training_stack):
+    #     post_training_impl = post_training_stack
+    #     job_status = await post_training_impl.get_training_job_status("1234")
+    #     assert isinstance(job_status, PostTrainingJobStatusResponse)
+    #     assert job_status.job_uuid == "1234"
+    #     assert job_status.status == JobStatus.completed
+    #     assert isinstance(job_status.checkpoints[0], Checkpoint)
+
+    # @pytest.mark.asyncio
+    # async def test_get_training_job_artifacts(self, post_training_stack):
+    #     post_training_impl = post_training_stack
+    #     job_artifacts = await post_training_impl.get_training_job_artifacts("1234")
+    #     assert isinstance(job_artifacts, PostTrainingJobArtifactsResponse)
+    #     assert job_artifacts.job_uuid == "1234"
+    #     assert isinstance(job_artifacts.checkpoints[0], Checkpoint)
+    #     assert job_artifacts.checkpoints[0].identifier == "instructlab/granite-7b-lab"
+    #     assert job_artifacts.checkpoints[0].epoch == 0
+    # assert "/.llama/checkpoints/Llama3.2-3B-Instruct-sft-0" in job_artifacts.checkpoints[0].path
diff --git a/tests/integration/providers/nvidia/__init__.py b/tests/integration/providers/nvidia/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/integration/providers/nvidia/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/integration/providers/nvidia/conftest.py b/tests/integration/providers/nvidia/conftest.py
new file mode 100644
index 000000000..8beb113b0
--- /dev/null
+++ b/tests/integration/providers/nvidia/conftest.py
@@ -0,0 +1,14 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+
+import pytest
+
+# Skip all tests in this directory when running in GitHub Actions
+in_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
+if in_github_actions:
+    pytest.skip("Skipping NVIDIA tests in GitHub Actions environment", allow_module_level=True)
diff --git a/tests/integration/providers/nvidia/test_datastore.py b/tests/integration/providers/nvidia/test_datastore.py
new file mode 100644
index 000000000..5f96dee9f
--- /dev/null
+++ b/tests/integration/providers/nvidia/test_datastore.py
@@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+# How to run this test:
+#
+# LLAMA_STACK_CONFIG="nvidia" pytest -v tests/integration/providers/nvidia/test_datastore.py
+
+
+# nvidia provider only
+@pytest.mark.parametrize(
+    "provider_id",
+    [
+        "nvidia",
+    ],
+)
+def test_register_and_unregister(llama_stack_client, provider_id):
+    purpose = "eval/messages-answer"
+    source = {
+        "type": "uri",
+        "uri": "hf://datasets/llamastack/simpleqa?split=train",
+    }
+    dataset_id = f"test-dataset-{provider_id}"
+    dataset = llama_stack_client.datasets.register(
+        dataset_id=dataset_id,
+        purpose=purpose,
+        source=source,
+        metadata={"provider_id": provider_id, "format": "json", "description": "Test dataset description"},
+    )
+    assert dataset.identifier is not None
+    assert dataset.provider_id == provider_id
+    assert dataset.identifier == dataset_id
+
+    dataset_list = llama_stack_client.datasets.list()
+    provider_datasets = [d for d in dataset_list if d.provider_id == provider_id]
+    assert any(provider_datasets)
+    assert any(d.identifier == dataset_id for d in provider_datasets)
+
+    llama_stack_client.datasets.unregister(dataset.identifier)
+    dataset_list = llama_stack_client.datasets.list()
+    provider_datasets = [d for d in dataset_list if d.identifier == dataset.identifier]
+    assert not any(provider_datasets)
diff --git a/tests/integration/report.py b/tests/integration/report.py
deleted file mode 100644
index a50f51d3f..000000000
--- a/tests/integration/report.py
+++ /dev/null
@@ -1,216 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from collections import defaultdict
-
-import pytest
-from pytest import CollectReport
-from termcolor import cprint
-
-from llama_stack.models.llama.sku_list import (
-    all_registered_models,
-    llama3_1_instruct_models,
-    llama3_2_instruct_models,
-    llama3_3_instruct_models,
-    llama3_instruct_models,
-    safety_models,
-)
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.datatypes import Api
-
-from .metadata import API_MAPS
-
-
-def featured_models():
-    models = [
-        *llama3_instruct_models(),
-        *llama3_1_instruct_models(),
-        *llama3_2_instruct_models(),
-        *llama3_3_instruct_models(),
-        *safety_models(),
-    ]
-    return {model.huggingface_repo: model for model in models if not model.variant}
-
-
-SUPPORTED_MODELS = {
-    "ollama": {
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_3_70b_instruct.value,
-        CoreModelId.llama_guard_3_8b.value,
-        CoreModelId.llama_guard_3_1b.value,
-    },
-    "tgi": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
-    "vllm": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
-}
-
-
-class Report:
-    def __init__(self, config):
-        self.distro_name = None
-        self.config = config
-
-        stack_config = self.config.getoption("--stack-config")
-        if stack_config:
-            is_url = stack_config.startswith("http") or "//" in stack_config
-            is_yaml = stack_config.endswith(".yaml")
-            if not is_url and not is_yaml:
-                self.distro_name = stack_config
-
-        self.report_data = defaultdict(dict)
-        # test function -> test nodeid
-        self.test_data = dict()
-        self.test_name_to_nodeid = defaultdict(list)
-        self.vision_model_id = None
-        self.text_model_id = None
-        self.client = None
-
-    @pytest.hookimpl(tryfirst=True)
-    def pytest_runtest_logreport(self, report):
-        # This hook is called in several phases, including setup, call and teardown
-        # The test is considered failed / error if any of the outcomes is not "Passed"
-        outcome = self._process_outcome(report)
-        if report.nodeid not in self.test_data:
-            self.test_data[report.nodeid] = outcome
-        elif self.test_data[report.nodeid] != outcome and outcome != "Passed":
-            self.test_data[report.nodeid] = outcome
-
-    def pytest_sessionfinish(self, session):
-        if not self.client:
-            return
-
-        report = []
-        report.append(f"# Report for {self.distro_name} distribution")
-        report.append("\n## Supported Models")
-
-        header = f"| Model Descriptor | {self.distro_name} |"
-        dividor = "|:---|:---|"
-
-        report.append(header)
-        report.append(dividor)
-
-        rows = []
-        if self.distro_name in SUPPORTED_MODELS:
-            for model in all_registered_models():
-                if ("Instruct" not in model.core_model_id.value and "Guard" not in model.core_model_id.value) or (
-                    model.variant
-                ):
-                    continue
-                row = f"| {model.core_model_id.value} |"
-                if model.core_model_id.value in SUPPORTED_MODELS[self.distro_name]:
-                    row += " ✅ |"
-                else:
-                    row += " ❌ |"
-                rows.append(row)
-        else:
-            supported_models = {m.identifier for m in self.client.models.list()}
-            for hf_name, model in featured_models().items():
-                row = f"| {model.core_model_id.value} |"
-                if hf_name in supported_models:
-                    row += " ✅ |"
-                else:
-                    row += " ❌ |"
-                rows.append(row)
-        report.extend(rows)
-
-        report.append("\n## Inference")
-        test_table = [
-            "| Model | API | Capability | Test | Status |",
-            "|:----- |:-----|:-----|:-----|:-----|",
-        ]
-        for api, capa_map in API_MAPS[Api.inference].items():
-            for capa, tests in capa_map.items():
-                for test_name in tests:
-                    model_id = self.text_model_id if "text" in test_name else self.vision_model_id
-                    test_nodeids = self.test_name_to_nodeid[test_name]
-                    if not test_nodeids:
-                        continue
-
-                    # There might be more than one parametrizations for the same test function. We take
-                    # the result of the first one for now. Ideally we should mark the test as failed if
-                    # any of the parametrizations failed.
-                    test_table.append(
-                        f"| {model_id} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
-                    )
-
-        report.extend(test_table)
-
-        name_map = {Api.vector_io: "Vector IO", Api.agents: "Agents"}
-        providers = self.client.providers.list()
-        for api_group in [Api.vector_io, Api.agents]:
-            api_capitalized = name_map[api_group]
-            report.append(f"\n## {api_capitalized}")
-            test_table = [
-                "| Provider | API | Capability | Test | Status |",
-                "|:-----|:-----|:-----|:-----|:-----|",
-            ]
-            provider = [p for p in providers if p.api == str(api_group.name)]
-            provider_str = ",".join(provider) if provider else ""
-            for api, capa_map in API_MAPS[api_group].items():
-                for capa, tests in capa_map.items():
-                    for test_name in tests:
-                        test_nodeids = self.test_name_to_nodeid[test_name]
-                        if not test_nodeids:
-                            continue
-                        test_table.append(
-                            f"| {provider_str} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
-                        )
-            report.extend(test_table)
-
-        output_file = self.output_path
-        text = "\n".join(report) + "\n"
-        output_file.write_text(text)
-        cprint(f"\nReport generated: {output_file.absolute()}", "green")
-
-    def pytest_runtest_makereport(self, item, call):
-        func_name = getattr(item, "originalname", item.name)
-        self.test_name_to_nodeid[func_name].append(item.nodeid)
-
-        # Get values from fixtures for report output
-        if model_id := item.funcargs.get("text_model_id"):
-            text_model = model_id.split("/")[1]
-            self.text_model_id = self.text_model_id or text_model
-        elif model_id := item.funcargs.get("vision_model_id"):
-            vision_model = model_id.split("/")[1]
-            self.vision_model_id = self.vision_model_id or vision_model
-
-        if not self.client:
-            self.client = item.funcargs.get("llama_stack_client")
-
-    def _print_result_icon(self, result):
-        if result == "Passed":
-            return "✅"
-        elif result == "Failed" or result == "Error":
-            return "❌"
-        else:
-            #  result == "Skipped":
-            return "⏭️"
-
-    def _process_outcome(self, report: CollectReport):
-        if self._is_error(report):
-            return "Error"
-        if hasattr(report, "wasxfail"):
-            if report.outcome in ["passed", "failed"]:
-                return "XPassed"
-            if report.outcome == "skipped":
-                return "XFailed"
-        return report.outcome.capitalize()
-
-    def _is_error(self, report: CollectReport):
-        return report.when in ["setup", "teardown", "collect"] and report.outcome == "failed"
diff --git a/tests/integration/telemetry/test_telemetry.py b/tests/integration/telemetry/test_telemetry.py
index c46de3742..c65f87489 100644
--- a/tests/integration/telemetry/test_telemetry.py
+++ b/tests/integration/telemetry/test_telemetry.py
@@ -7,9 +7,11 @@
 import time
 from uuid import uuid4
 
+import pytest
 from llama_stack_client import Agent
 
 
+@pytest.mark.skip(reason="telemetry is not stable")
 def test_agent_query_spans(llama_stack_client, text_model_id):
     agent = Agent(llama_stack_client, model=text_model_id, instructions="You are a helpful assistant")
     session_id = agent.create_session(f"test-session-{uuid4()}")
diff --git a/tests/integration/test_cases/inference/chat_completion.json b/tests/integration/test_cases/inference/chat_completion.json
index 5663089fb..1ae018397 100644
--- a/tests/integration/test_cases/inference/chat_completion.json
+++ b/tests/integration/test_cases/inference/chat_completion.json
@@ -98,7 +98,7 @@
       }
     }
   },
-  "multi_turn_tool_calling_01": {
+  "text_then_tool": {
     "data": {
       "messages": [
         [
@@ -150,7 +150,7 @@
       ]
     }
   },
-  "multi_turn_tool_calling_02": {
+  "tool_then_answer": {
     "data": {
       "messages": [
         [
@@ -192,7 +192,7 @@
       ]
     }
   },
-  "multi_turn_tool_calling_03": {
+  "array_parameter": {
     "data": {
       "messages": [
         [
@@ -252,183 +252,6 @@
       ]
     }
   },
-  "multi_turn_tool_calling_04": {
-    "data": {
-      "messages": [
-        [
-          {
-            "role": "system",
-            "content": "Todays date is 2025-03-01."
-          },
-          {
-            "role": "user",
-            "content": "Do i have any meetings on March 3rd at 10 am ?"
-          }
-        ],
-        [
-          {
-            "role": "user",
-            "content": "Alright then, Create an event named 'Team Building', scheduled for that time same time, in the 'Main Conference Room' and add Alice, Bob, Charlie to it. Give me the created event id."
-          }
-        ]
-      ],
-      "tools": [
-        {
-          "tool_name": "create_event",
-          "description": "Create a new event",
-          "parameters": {
-            "name": {
-              "param_type": "string",
-              "description": "Name of the event"
-            },
-            "date": {
-              "param_type": "string",
-              "description": "Date of the event in ISO format"
-            },
-            "time": {
-              "param_type": "string",
-              "description": "Event Time (HH:MM)"
-            },
-            "location": {
-              "param_type": "string",
-              "description": "Location of the event"
-            },
-            "participants": {
-              "param_type": "list[str]",
-              "description": "List of participant names"
-            }
-          }
-        },
-        {
-          "tool_name": "get_event",
-          "description": "Get an event by date and time",
-          "parameters": {
-            "date": {
-              "param_type": "string",
-              "description": "Date of the event in ISO format"
-            },
-            "time": {
-              "param_type": "string",
-              "description": "Event Time (HH:MM)"
-            }
-          }
-        }
-      ],
-      "tool_responses": [
-        {
-          "response": "{'response': 'No events found for 2025-03-03 at 10:00'}"
-        },
-        {
-          "response": "{'response': 'Successfully created new event with id: e_123'}"
-        }
-      ],
-      "expected": [
-        {
-          "num_tool_calls": 1,
-          "tool_name": "get_event",
-          "tool_arguments": {
-            "date": "2025-03-03",
-            "time": "10:00"
-          }
-        },
-        {
-          "num_tool_calls": 0,
-          "answer": "no"
-        },
-        {
-          "num_tool_calls": 1,
-          "tool_name": "create_event",
-          "tool_arguments": {
-            "name": "Team Building",
-            "date": "2025-03-03",
-            "time": "10:00",
-            "location": "Main Conference Room",
-            "participants": [
-              "Alice",
-              "Bob",
-              "Charlie"
-            ]
-          }
-        },
-        {
-          "num_tool_calls": 0,
-          "answer": "e_123"
-        }
-      ]
-    }
-  },
-  "multi_turn_tool_calling_05": {
-    "data": {
-      "messages": [
-        [
-          {
-            "role": "system",
-            "content": "Todays date is 2025-03-01."
-          },
-          {
-            "role": "user",
-            "content": "what was my monthly expense in Jan of this year?"
-          }
-        ],
-        [
-          {
-            "role": "user",
-            "content": "Was it less than Feb of last year? Only answer with yes or no."
-          }
-        ]
-      ],
-      "tools": [
-        {
-          "tool_name": "getMonthlyExpenseSummary",
-          "description": "Get monthly expense summary",
-          "parameters": {
-            "month": {
-              "param_type": "int",
-              "description": "Month of the year (1-12)"
-            },
-            "year": {
-              "param_type": "int",
-              "description": "Year"
-            }
-          }
-        }
-      ],
-      "tool_responses": [
-        {
-          "response": "{'response': 'Total expenses for January 2025: $1000'}"
-        },
-        {
-          "response": "{'response': 'Total expenses for February 2024: $2000'}"
-        }
-      ],
-      "expected": [
-        {
-          "num_tool_calls": 1,
-          "tool_name": "getMonthlyExpenseSummary",
-          "tool_arguments": {
-            "month": 1,
-            "year": 2025
-          }
-        },
-        {
-          "num_tool_calls": 0,
-          "answer": "1000"
-        },
-        {
-          "num_tool_calls": 1,
-          "tool_name": "getMonthlyExpenseSummary",
-          "tool_arguments": {
-            "month": 2,
-            "year": 2024
-          }
-        },
-        {
-          "num_tool_calls": 0,
-          "answer": "yes"
-        }
-      ]
-    }
-  },
   "sample_messages_tool_calling": {
     "data": {
       "messages": [
diff --git a/tests/integration/test_cases/openai/responses.json b/tests/integration/test_cases/openai/responses.json
new file mode 100644
index 000000000..d17d0cd4f
--- /dev/null
+++ b/tests/integration/test_cases/openai/responses.json
@@ -0,0 +1,37 @@
+{
+  "non_streaming_01": {
+    "data": {
+      "question": "Which planet do humans live on?",
+      "expected": "Earth"
+    }
+  },
+  "non_streaming_02": {
+    "data": {
+      "question": "Which planet has rings around it with a name starting with letter S?",
+      "expected": "Saturn"
+    }
+  },
+  "streaming_01": {
+    "data": {
+      "question": "What's the name of the Sun in latin?",
+      "expected": "Sol"
+    }
+  },
+  "streaming_02": {
+    "data": {
+      "question": "What is the name of the US captial?",
+      "expected": "Washington"
+    }
+  },
+  "tools_web_search_01": {
+    "data": {
+      "input": "How many experts does the Llama 4 Maverick model have?",
+      "tools": [
+        {
+          "type": "web_search"
+        }
+      ],
+      "expected": "128"
+    }
+  }
+}
diff --git a/tests/integration/test_cases/test_case.py b/tests/integration/test_cases/test_case.py
index 8514f3046..fc3bf97c8 100644
--- a/tests/integration/test_cases/test_case.py
+++ b/tests/integration/test_cases/test_case.py
@@ -12,6 +12,7 @@ class TestCase:
     _apis = [
         "inference/chat_completion",
         "inference/completion",
+        "openai/responses",
     ]
     _jsonblob = {}
 
@@ -19,7 +20,7 @@ class TestCase:
         # loading all test cases
         if self._jsonblob == {}:
             for api in self._apis:
-                with open(pathlib.Path(__file__).parent / f"{api}.json", "r") as f:
+                with open(pathlib.Path(__file__).parent / f"{api}.json") as f:
                     coloned = api.replace("/", ":")
                     try:
                         loaded = json.load(f)
diff --git a/tests/integration/tool_runtime/test_builtin_tools.py b/tests/integration/tool_runtime/test_builtin_tools.py
index 9edf3afa0..1acf06719 100644
--- a/tests/integration/tool_runtime/test_builtin_tools.py
+++ b/tests/integration/tool_runtime/test_builtin_tools.py
@@ -25,10 +25,12 @@ def test_web_search_tool(llama_stack_client, sample_search_query):
     if "TAVILY_SEARCH_API_KEY" not in os.environ:
         pytest.skip("TAVILY_SEARCH_API_KEY not set, skipping test")
 
+    tools = llama_stack_client.tool_runtime.list_tools()
+    assert any(tool.identifier == "web_search" for tool in tools)
+
     response = llama_stack_client.tool_runtime.invoke_tool(
         tool_name="web_search", kwargs={"query": sample_search_query}
     )
-
     # Verify the response
     assert response.content is not None
     assert len(response.content) > 0
@@ -49,11 +51,12 @@ def test_wolfram_alpha_tool(llama_stack_client, sample_wolfram_alpha_query):
     if "WOLFRAM_ALPHA_API_KEY" not in os.environ:
         pytest.skip("WOLFRAM_ALPHA_API_KEY not set, skipping test")
 
+    tools = llama_stack_client.tool_runtime.list_tools()
+    assert any(tool.identifier == "wolfram_alpha" for tool in tools)
     response = llama_stack_client.tool_runtime.invoke_tool(
         tool_name="wolfram_alpha", kwargs={"query": sample_wolfram_alpha_query}
     )
 
-    print(response.content)
     assert response.content is not None
     assert len(response.content) > 0
     assert isinstance(response.content, str)
diff --git a/tests/integration/tool_runtime/test_mcp.py b/tests/integration/tool_runtime/test_mcp.py
new file mode 100644
index 000000000..72aa25e52
--- /dev/null
+++ b/tests/integration/tool_runtime/test_mcp.py
@@ -0,0 +1,132 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+
+import pytest
+from llama_stack_client import Agent
+
+from llama_stack import LlamaStackAsLibraryClient
+from llama_stack.apis.models import ModelType
+from llama_stack.distribution.datatypes import AuthenticationRequiredError
+
+AUTH_TOKEN = "test-token"
+
+from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
+
+
+@pytest.fixture(scope="function")
+def mcp_server():
+    with make_mcp_server(required_auth_token=AUTH_TOKEN) as mcp_server_info:
+        yield mcp_server_info
+
+
+def test_mcp_invocation(llama_stack_client, mcp_server):
+    if not isinstance(llama_stack_client, LlamaStackAsLibraryClient):
+        pytest.skip("The local MCP server only reliably reachable from library client.")
+
+    test_toolgroup_id = MCP_TOOLGROUP_ID
+    uri = mcp_server["server_url"]
+
+    # registering should not raise an error anymore even if you don't specify the auth token
+    llama_stack_client.toolgroups.register(
+        toolgroup_id=test_toolgroup_id,
+        provider_id="model-context-protocol",
+        mcp_endpoint=dict(uri=uri),
+    )
+
+    provider_data = {
+        "mcp_headers": {
+            uri: {
+                "Authorization": f"Bearer {AUTH_TOKEN}",
+            },
+        },
+    }
+    auth_headers = {
+        "X-LlamaStack-Provider-Data": json.dumps(provider_data),
+    }
+
+    with pytest.raises(Exception, match="Unauthorized"):
+        llama_stack_client.tools.list()
+
+    response = llama_stack_client.tools.list(
+        toolgroup_id=test_toolgroup_id,
+        extra_headers=auth_headers,
+    )
+    assert len(response) == 2
+    assert {t.identifier for t in response} == {"greet_everyone", "get_boiling_point"}
+
+    response = llama_stack_client.tool_runtime.invoke_tool(
+        tool_name="greet_everyone",
+        kwargs=dict(url="https://www.google.com"),
+        extra_headers=auth_headers,
+    )
+    content = response.content
+    assert len(content) == 1
+    assert content[0].type == "text"
+    assert content[0].text == "Hello, world!"
+
+    models = [
+        m for m in llama_stack_client.models.list() if m.model_type == ModelType.llm and "guard" not in m.identifier
+    ]
+    model_id = models[0].identifier
+    print(f"Using model: {model_id}")
+    agent = Agent(
+        client=llama_stack_client,
+        model=model_id,
+        instructions="You are a helpful assistant.",
+        tools=[test_toolgroup_id],
+    )
+    session_id = agent.create_session("test-session")
+    response = agent.create_turn(
+        session_id=session_id,
+        messages=[
+            {
+                "role": "user",
+                "content": "Say hi to the world. Use tools to do so.",
+            }
+        ],
+        stream=False,
+        extra_headers=auth_headers,
+    )
+    steps = response.steps
+    first = steps[0]
+    assert first.step_type == "inference"
+    assert len(first.api_model_response.tool_calls) == 1
+    tool_call = first.api_model_response.tool_calls[0]
+    assert tool_call.tool_name == "greet_everyone"
+
+    second = steps[1]
+    assert second.step_type == "tool_execution"
+    tool_response_content = second.tool_responses[0].content
+    assert len(tool_response_content) == 1
+    assert tool_response_content[0].type == "text"
+    assert tool_response_content[0].text == "Hello, world!"
+
+    third = steps[2]
+    assert third.step_type == "inference"
+
+    # when streaming, we currently don't check auth headers upfront and fail the request
+    # early. but we should at least be generating a 401 later in the process.
+    response = agent.create_turn(
+        session_id=session_id,
+        messages=[
+            {
+                "role": "user",
+                "content": "What is the boiling point of polyjuice? Use tools to answer.",
+            }
+        ],
+        stream=True,
+    )
+    if isinstance(llama_stack_client, LlamaStackAsLibraryClient):
+        with pytest.raises(AuthenticationRequiredError):
+            for _ in response:
+                pass
+    else:
+        error_chunks = [chunk for chunk in response if "error" in chunk.model_dump()]
+        assert len(error_chunks) == 1
+        chunk = error_chunks[0].model_dump()
+        assert "Unauthorized" in chunk["error"]["message"]
diff --git a/tests/integration/tool_runtime/test_rag_tool.py b/tests/integration/tool_runtime/test_rag_tool.py
index c49f507a8..46f4f8768 100644
--- a/tests/integration/tool_runtime/test_rag_tool.py
+++ b/tests/integration/tool_runtime/test_rag_tool.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 import pytest
+from llama_stack_client import BadRequestError
 from llama_stack_client.types import Document
 
 
@@ -49,7 +50,7 @@ def sample_documents():
     ]
 
 
-def assert_valid_response(response):
+def assert_valid_chunk_response(response):
     assert len(response.chunks) > 0
     assert len(response.scores) > 0
     assert len(response.chunks) == len(response.scores)
@@ -57,6 +58,11 @@ def assert_valid_response(response):
         assert isinstance(chunk.content, str)
 
 
+def assert_valid_text_response(response):
+    assert len(response.content) > 0
+    assert all(isinstance(chunk.text, str) for chunk in response.content)
+
+
 def test_vector_db_insert_inline_and_query(client_with_empty_registry, sample_documents, embedding_model_id):
     vector_db_id = "test_vector_db"
     client_with_empty_registry.vector_dbs.register(
@@ -77,7 +83,7 @@ def test_vector_db_insert_inline_and_query(client_with_empty_registry, sample_do
         vector_db_id=vector_db_id,
         query=query1,
     )
-    assert_valid_response(response1)
+    assert_valid_chunk_response(response1)
     assert any("Python" in chunk.content for chunk in response1.chunks)
 
     # Query with semantic similarity
@@ -86,7 +92,7 @@ def test_vector_db_insert_inline_and_query(client_with_empty_registry, sample_do
         vector_db_id=vector_db_id,
         query=query2,
     )
-    assert_valid_response(response2)
+    assert_valid_chunk_response(response2)
     assert any("neural networks" in chunk.content.lower() for chunk in response2.chunks)
 
     # Query with limit on number of results (max_chunks=2)
@@ -96,7 +102,7 @@ def test_vector_db_insert_inline_and_query(client_with_empty_registry, sample_do
         query=query3,
         params={"max_chunks": 2},
     )
-    assert_valid_response(response3)
+    assert_valid_chunk_response(response3)
     assert len(response3.chunks) <= 2
 
     # Query with threshold on similarity score
@@ -106,7 +112,7 @@ def test_vector_db_insert_inline_and_query(client_with_empty_registry, sample_do
         query=query4,
         params={"score_threshold": 0.01},
     )
-    assert_valid_response(response4)
+    assert_valid_chunk_response(response4)
     assert all(score >= 0.01 for score in response4.scores)
 
 
@@ -126,9 +132,6 @@ def test_vector_db_insert_from_url_and_query(client_with_empty_registry, sample_
     available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
     assert vector_db_id in available_vector_dbs
 
-    # URLs of documents to insert
-    # TODO: Move to test/memory/resources then update the url to
-    # https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/memory/resources/{url}
     urls = [
         "memory_optimizations.rst",
         "chat.rst",
@@ -155,7 +158,7 @@ def test_vector_db_insert_from_url_and_query(client_with_empty_registry, sample_
         vector_db_id=vector_db_id,
         query="What's the name of the fine-tunning method used?",
     )
-    assert_valid_response(response1)
+    assert_valid_chunk_response(response1)
     assert any("lora" in chunk.content.lower() for chunk in response1.chunks)
 
     # Query for the name of model
@@ -163,5 +166,69 @@ def test_vector_db_insert_from_url_and_query(client_with_empty_registry, sample_
         vector_db_id=vector_db_id,
         query="Which Llama model is mentioned?",
     )
-    assert_valid_response(response2)
+    assert_valid_chunk_response(response2)
     assert any("llama2" in chunk.content.lower() for chunk in response2.chunks)
+
+
+def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_id):
+    providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
+    assert len(providers) > 0
+
+    vector_db_id = "test_vector_db"
+
+    client_with_empty_registry.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model=embedding_model_id,
+        embedding_dimension=384,
+    )
+
+    available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
+    assert vector_db_id in available_vector_dbs
+
+    urls = [
+        "memory_optimizations.rst",
+        "chat.rst",
+        "llama3.rst",
+    ]
+    documents = [
+        Document(
+            document_id=f"num-{i}",
+            content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
+            mime_type="text/plain",
+            metadata={"author": "llama", "source": url},
+        )
+        for i, url in enumerate(urls)
+    ]
+
+    client_with_empty_registry.tool_runtime.rag_tool.insert(
+        documents=documents,
+        vector_db_id=vector_db_id,
+        chunk_size_in_tokens=512,
+    )
+
+    response_with_metadata = client_with_empty_registry.tool_runtime.rag_tool.query(
+        vector_db_ids=[vector_db_id],
+        content="What is the name of the method used for fine-tuning?",
+    )
+    assert_valid_text_response(response_with_metadata)
+    assert any("metadata:" in chunk.text.lower() for chunk in response_with_metadata.content)
+
+    response_without_metadata = client_with_empty_registry.tool_runtime.rag_tool.query(
+        vector_db_ids=[vector_db_id],
+        content="What is the name of the method used for fine-tuning?",
+        query_config={
+            "include_metadata_in_content": True,
+            "chunk_template": "Result {index}\nContent: {chunk.content}\n",
+        },
+    )
+    assert_valid_text_response(response_without_metadata)
+    assert not any("metadata:" in chunk.text.lower() for chunk in response_without_metadata.content)
+
+    with pytest.raises((ValueError, BadRequestError)):
+        client_with_empty_registry.tool_runtime.rag_tool.query(
+            vector_db_ids=[vector_db_id],
+            content="What is the name of the method used for fine-tuning?",
+            query_config={
+                "chunk_template": "This should raise a ValueError because it is missing the proper template variables",
+            },
+        )
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
index e4241d813..0846f8c89 100644
--- a/tests/integration/tool_runtime/test_registration.py
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -4,120 +4,52 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import socket
-import threading
-import time
-
-import httpx
-import mcp.types as types
 import pytest
-import uvicorn
-from mcp.server.fastmcp import Context, FastMCP
-from mcp.server.sse import SseServerTransport
-from starlette.applications import Starlette
-from starlette.routing import Mount, Route
+
+from llama_stack import LlamaStackAsLibraryClient
+from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
 
 
-@pytest.fixture(scope="module")
-def mcp_server():
-    server = FastMCP("FastMCP Test Server")
+def test_register_and_unregister_toolgroup(llama_stack_client):
+    # TODO: make this work for http client also but you need to ensure
+    # the MCP server is reachable from llama stack server
+    if not isinstance(llama_stack_client, LlamaStackAsLibraryClient):
+        pytest.skip("The local MCP server only reliably reachable from library client.")
 
-    @server.tool()
-    async def fetch(url: str, ctx: Context) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
-        headers = {"User-Agent": "MCP Test Server (github.com/modelcontextprotocol/python-sdk)"}
-        async with httpx.AsyncClient(follow_redirects=True, headers=headers) as client:
-            response = await client.get(url)
-            response.raise_for_status()
-            return [types.TextContent(type="text", text=response.text)]
-
-    sse = SseServerTransport("/messages/")
-
-    async def handle_sse(request):
-        async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
-            await server._mcp_server.run(
-                streams[0],
-                streams[1],
-                server._mcp_server.create_initialization_options(),
-            )
-
-    app = Starlette(
-        debug=True,
-        routes=[
-            Route("/sse", endpoint=handle_sse),
-            Mount("/messages/", app=sse.handle_post_message),
-        ],
-    )
-
-    def get_open_port():
-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
-            sock.bind(("", 0))
-            return sock.getsockname()[1]
-
-    port = get_open_port()
-
-    def run_server():
-        uvicorn.run(app, host="0.0.0.0", port=port)
-
-    # Start the server in a new thread
-    server_thread = threading.Thread(target=run_server, daemon=True)
-    server_thread.start()
-
-    # Polling until the server is ready
-    timeout = 10
-    start_time = time.time()
-
-    while time.time() - start_time < timeout:
-        try:
-            response = httpx.get(f"http://localhost:{port}/sse")
-            if response.status_code == 200:
-                break
-        except (httpx.RequestError, httpx.HTTPStatusError):
-            pass
-        time.sleep(0.1)
-
-    yield port
-
-
-def test_register_and_unregister_toolgroup(llama_stack_client, mcp_server):
-    """
-    Integration test for registering and unregistering a toolgroup using the ToolGroups API.
-    """
-    port = mcp_server
-    test_toolgroup_id = "remote::web-fetch"
+    test_toolgroup_id = MCP_TOOLGROUP_ID
     provider_id = "model-context-protocol"
 
-    # Cleanup before running the test
-    toolgroups = llama_stack_client.toolgroups.list()
-    for toolgroup in toolgroups:
-        if toolgroup.identifier == test_toolgroup_id:
-            llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
+    with make_mcp_server() as mcp_server_info:
+        # Cleanup before running the test
+        toolgroups = llama_stack_client.toolgroups.list()
+        for toolgroup in toolgroups:
+            if toolgroup.identifier == test_toolgroup_id:
+                llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
 
-    # Register the toolgroup
-    llama_stack_client.toolgroups.register(
-        toolgroup_id=test_toolgroup_id,
-        provider_id=provider_id,
-        mcp_endpoint=dict(uri=f"http://localhost:{port}/sse"),
-    )
+        # Register the toolgroup
+        llama_stack_client.toolgroups.register(
+            toolgroup_id=test_toolgroup_id,
+            provider_id=provider_id,
+            mcp_endpoint=dict(uri=mcp_server_info["server_url"]),
+        )
 
-    # Verify registration
-    registered_toolgroup = llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id)
-    assert registered_toolgroup is not None
-    assert registered_toolgroup.identifier == test_toolgroup_id
-    assert registered_toolgroup.provider_id == provider_id
+        # Verify registration
+        registered_toolgroup = llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id)
+        assert registered_toolgroup is not None
+        assert registered_toolgroup.identifier == test_toolgroup_id
+        assert registered_toolgroup.provider_id == provider_id
 
-    # Verify tools listing
-    tools_list_response = llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id)
-    assert isinstance(tools_list_response, list)
-    assert tools_list_response
+        # Verify tools listing
+        tools_list_response = llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id)
+        assert isinstance(tools_list_response, list)
+        assert tools_list_response
 
-    # Unregister the toolgroup
-    llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
+        # Unregister the toolgroup
+        llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
 
-    # Verify it is unregistered
-    with pytest.raises(ValueError, match=f"Tool group '{test_toolgroup_id}' not found"):
-        llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id)
+        # Verify it is unregistered
+        with pytest.raises(Exception, match=f"Tool group '{test_toolgroup_id}' not found"):
+            llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id)
 
-    # Verify tools are also unregistered
-    unregister_tools_list_response = llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id)
-    assert isinstance(unregister_tools_list_response, list)
-    assert not unregister_tools_list_response
+        with pytest.raises(Exception, match=f"Tool group '{test_toolgroup_id}' not found"):
+            llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id)
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index 90cb00313..f1cac9701 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -120,3 +120,37 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, sample_ch
     top_match = response.chunks[0]
     assert top_match is not None
     assert top_match.metadata["document_id"] == expected_doc_id, f"Query '{query}' should match {expected_doc_id}"
+
+
+def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, embedding_model_id):
+    vector_db_id = "test_precomputed_embeddings_db"
+    client_with_empty_registry.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model=embedding_model_id,
+        embedding_dimension=384,
+    )
+
+    chunks_with_embeddings = [
+        Chunk(
+            content="This is a test chunk with precomputed embedding.",
+            metadata={"document_id": "doc1", "source": "precomputed"},
+            embedding=[0.1] * 384,
+        ),
+    ]
+
+    client_with_empty_registry.vector_io.insert(
+        vector_db_id=vector_db_id,
+        chunks=chunks_with_embeddings,
+    )
+
+    # Query for the first document
+    response = client_with_empty_registry.vector_io.query(
+        vector_db_id=vector_db_id,
+        query="precomputed embedding test",
+    )
+
+    # Verify the top result is the expected document
+    assert response is not None
+    assert len(response.chunks) > 0
+    assert response.chunks[0].metadata["document_id"] == "doc1"
+    assert response.chunks[0].metadata["source"] == "precomputed"
diff --git a/tests/unit/README.md b/tests/unit/README.md
new file mode 100644
index 000000000..db2114049
--- /dev/null
+++ b/tests/unit/README.md
@@ -0,0 +1,21 @@
+# Llama Stack Unit Tests
+
+You can run the unit tests by running:
+
+```bash
+source .venv/bin/activate
+./scripts/unit-tests.sh [PYTEST_ARGS]
+```
+
+Any additional arguments are passed to pytest. For example, you can specify a test directory, a specific test file, or any pytest flags (e.g., -vvv for verbosity). If no test directory is specified, it defaults to "tests/unit", e.g:
+
+```bash
+./scripts/unit-tests.sh tests/unit/registry/test_registry.py -vvv
+```
+
+If you'd like to run for a non-default version of Python (currently 3.10), pass `PYTHON_VERSION` variable as follows:
+
+```
+source .venv/bin/activate
+PYTHON_VERSION=3.13 ./scripts/unit-tests.sh
+```
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/unit/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py
index 312f58c09..d2b6f4b08 100644
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@@ -18,11 +18,11 @@ from llama_stack.distribution.configure import (
 @pytest.fixture
 def up_to_date_config():
     return yaml.safe_load(
-        """
-        version: {version}
+        f"""
+        version: {LLAMA_STACK_RUN_CONFIG_VERSION}
         image_name: foo
         apis_to_serve: []
-        built_at: {built_at}
+        built_at: {datetime.now().isoformat()}
         providers:
           inference:
             - provider_id: provider1
@@ -42,16 +42,16 @@ def up_to_date_config():
             - provider_id: provider1
               provider_type: inline::meta-reference
               config: {{}}
-    """.format(version=LLAMA_STACK_RUN_CONFIG_VERSION, built_at=datetime.now().isoformat())
+    """
     )
 
 
 @pytest.fixture
 def old_config():
     return yaml.safe_load(
-        """
+        f"""
         image_name: foo
-        built_at: {built_at}
+        built_at: {datetime.now().isoformat()}
         apis_to_serve: []
         routing_table:
           inference:
@@ -82,7 +82,7 @@ def old_config():
           telemetry:
             provider_type: noop
             config: {{}}
-    """.format(built_at=datetime.now().isoformat())
+    """
     )
 
 
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
new file mode 100644
index 000000000..aedac0386
--- /dev/null
+++ b/tests/unit/conftest.py
@@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# We need to import the fixtures here so that pytest can find them
+# but ruff doesn't think they are used and removes the import. "noqa: F401" prevents them from being removed
+from .fixtures import cached_disk_dist_registry, disk_dist_registry, sqlite_kvstore  # noqa: F401
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
new file mode 100644
index 000000000..2a30fd0b8
--- /dev/null
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -0,0 +1,298 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Unit tests for the routing tables
+
+from unittest.mock import AsyncMock
+
+import pytest
+
+from llama_stack.apis.common.type_system import NumberType
+from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
+from llama_stack.apis.datatypes import Api
+from llama_stack.apis.models.models import Model, ModelType
+from llama_stack.apis.shields.shields import Shield
+from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter
+from llama_stack.apis.vector_dbs.vector_dbs import VectorDB
+from llama_stack.distribution.routing_tables.benchmarks import BenchmarksRoutingTable
+from llama_stack.distribution.routing_tables.datasets import DatasetsRoutingTable
+from llama_stack.distribution.routing_tables.models import ModelsRoutingTable
+from llama_stack.distribution.routing_tables.scoring_functions import ScoringFunctionsRoutingTable
+from llama_stack.distribution.routing_tables.shields import ShieldsRoutingTable
+from llama_stack.distribution.routing_tables.toolgroups import ToolGroupsRoutingTable
+from llama_stack.distribution.routing_tables.vector_dbs import VectorDBsRoutingTable
+
+
+class Impl:
+    def __init__(self, api: Api):
+        self.api = api
+
+    @property
+    def __provider_spec__(self):
+        _provider_spec = AsyncMock()
+        _provider_spec.api = self.api
+        return _provider_spec
+
+
+class InferenceImpl(Impl):
+    def __init__(self):
+        super().__init__(Api.inference)
+
+    async def register_model(self, model: Model):
+        return model
+
+    async def unregister_model(self, model_id: str):
+        return model_id
+
+
+class SafetyImpl(Impl):
+    def __init__(self):
+        super().__init__(Api.safety)
+
+    async def register_shield(self, shield: Shield):
+        return shield
+
+
+class VectorDBImpl(Impl):
+    def __init__(self):
+        super().__init__(Api.vector_io)
+
+    async def register_vector_db(self, vector_db: VectorDB):
+        return vector_db
+
+    async def unregister_vector_db(self, vector_db_id: str):
+        return vector_db_id
+
+
+class DatasetsImpl(Impl):
+    def __init__(self):
+        super().__init__(Api.datasetio)
+
+    async def register_dataset(self, dataset: Dataset):
+        return dataset
+
+    async def unregister_dataset(self, dataset_id: str):
+        return dataset_id
+
+
+class ScoringFunctionsImpl(Impl):
+    def __init__(self):
+        super().__init__(Api.scoring)
+
+    async def list_scoring_functions(self):
+        return []
+
+    async def register_scoring_function(self, scoring_fn):
+        return scoring_fn
+
+
+class BenchmarksImpl(Impl):
+    def __init__(self):
+        super().__init__(Api.eval)
+
+    async def register_benchmark(self, benchmark):
+        return benchmark
+
+
+class ToolGroupsImpl(Impl):
+    def __init__(self):
+        super().__init__(Api.tool_runtime)
+
+    async def register_toolgroup(self, toolgroup: ToolGroup):
+        return toolgroup
+
+    async def unregister_toolgroup(self, toolgroup_id: str):
+        return toolgroup_id
+
+    async def list_runtime_tools(self, toolgroup_id, mcp_endpoint):
+        return ListToolDefsResponse(
+            data=[
+                ToolDef(
+                    name="test-tool",
+                    description="Test tool",
+                    parameters=[ToolParameter(name="test-param", description="Test param", parameter_type="string")],
+                )
+            ]
+        )
+
+
+@pytest.mark.asyncio
+async def test_models_routing_table(cached_disk_dist_registry):
+    table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry)
+    await table.initialize()
+
+    # Register multiple models and verify listing
+    await table.register_model(model_id="test-model", provider_id="test_provider")
+    await table.register_model(model_id="test-model-2", provider_id="test_provider")
+
+    models = await table.list_models()
+    assert len(models.data) == 2
+    model_ids = {m.identifier for m in models.data}
+    assert "test-model" in model_ids
+    assert "test-model-2" in model_ids
+
+    # Test openai list models
+    openai_models = await table.openai_list_models()
+    assert len(openai_models.data) == 2
+    openai_model_ids = {m.id for m in openai_models.data}
+    assert "test-model" in openai_model_ids
+    assert "test-model-2" in openai_model_ids
+
+    # Test get_object_by_identifier
+    model = await table.get_object_by_identifier("model", "test-model")
+    assert model is not None
+    assert model.identifier == "test-model"
+
+    # Test get_object_by_identifier on non-existent object
+    non_existent = await table.get_object_by_identifier("model", "non-existent-model")
+    assert non_existent is None
+
+    await table.unregister_model(model_id="test-model")
+    await table.unregister_model(model_id="test-model-2")
+
+    models = await table.list_models()
+    assert len(models.data) == 0
+
+    # Test openai list models
+    openai_models = await table.openai_list_models()
+    assert len(openai_models.data) == 0
+
+
+@pytest.mark.asyncio
+async def test_shields_routing_table(cached_disk_dist_registry):
+    table = ShieldsRoutingTable({"test_provider": SafetyImpl()}, cached_disk_dist_registry)
+    await table.initialize()
+
+    # Register multiple shields and verify listing
+    await table.register_shield(shield_id="test-shield", provider_id="test_provider")
+    await table.register_shield(shield_id="test-shield-2", provider_id="test_provider")
+    shields = await table.list_shields()
+
+    assert len(shields.data) == 2
+    shield_ids = {s.identifier for s in shields.data}
+    assert "test-shield" in shield_ids
+    assert "test-shield-2" in shield_ids
+
+
+@pytest.mark.asyncio
+async def test_vectordbs_routing_table(cached_disk_dist_registry):
+    table = VectorDBsRoutingTable({"test_provider": VectorDBImpl()}, cached_disk_dist_registry)
+    await table.initialize()
+
+    m_table = ModelsRoutingTable({"test_providere": InferenceImpl()}, cached_disk_dist_registry)
+    await m_table.initialize()
+    await m_table.register_model(
+        model_id="test-model",
+        provider_id="test_providere",
+        metadata={"embedding_dimension": 128},
+        model_type=ModelType.embedding,
+    )
+
+    # Register multiple vector databases and verify listing
+    await table.register_vector_db(vector_db_id="test-vectordb", embedding_model="test-model")
+    await table.register_vector_db(vector_db_id="test-vectordb-2", embedding_model="test-model")
+    vector_dbs = await table.list_vector_dbs()
+
+    assert len(vector_dbs.data) == 2
+    vector_db_ids = {v.identifier for v in vector_dbs.data}
+    assert "test-vectordb" in vector_db_ids
+    assert "test-vectordb-2" in vector_db_ids
+
+    await table.unregister_vector_db(vector_db_id="test-vectordb")
+    await table.unregister_vector_db(vector_db_id="test-vectordb-2")
+
+    vector_dbs = await table.list_vector_dbs()
+    assert len(vector_dbs.data) == 0
+
+
+async def test_datasets_routing_table(cached_disk_dist_registry):
+    table = DatasetsRoutingTable({"localfs": DatasetsImpl()}, cached_disk_dist_registry)
+    await table.initialize()
+
+    # Register multiple datasets and verify listing
+    await table.register_dataset(
+        dataset_id="test-dataset", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri")
+    )
+    await table.register_dataset(
+        dataset_id="test-dataset-2", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri-2")
+    )
+    datasets = await table.list_datasets()
+
+    assert len(datasets.data) == 2
+    dataset_ids = {d.identifier for d in datasets.data}
+    assert "test-dataset" in dataset_ids
+    assert "test-dataset-2" in dataset_ids
+
+    await table.unregister_dataset(dataset_id="test-dataset")
+    await table.unregister_dataset(dataset_id="test-dataset-2")
+
+    datasets = await table.list_datasets()
+    assert len(datasets.data) == 0
+
+
+@pytest.mark.asyncio
+async def test_scoring_functions_routing_table(cached_disk_dist_registry):
+    table = ScoringFunctionsRoutingTable({"test_provider": ScoringFunctionsImpl()}, cached_disk_dist_registry)
+    await table.initialize()
+
+    # Register multiple scoring functions and verify listing
+    await table.register_scoring_function(
+        scoring_fn_id="test-scoring-fn",
+        provider_id="test_provider",
+        description="Test scoring function",
+        return_type=NumberType(),
+    )
+    await table.register_scoring_function(
+        scoring_fn_id="test-scoring-fn-2",
+        provider_id="test_provider",
+        description="Another test scoring function",
+        return_type=NumberType(),
+    )
+    scoring_functions = await table.list_scoring_functions()
+
+    assert len(scoring_functions.data) == 2
+    scoring_fn_ids = {fn.identifier for fn in scoring_functions.data}
+    assert "test-scoring-fn" in scoring_fn_ids
+    assert "test-scoring-fn-2" in scoring_fn_ids
+
+
+@pytest.mark.asyncio
+async def test_benchmarks_routing_table(cached_disk_dist_registry):
+    table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, cached_disk_dist_registry)
+    await table.initialize()
+
+    # Register multiple benchmarks and verify listing
+    await table.register_benchmark(
+        benchmark_id="test-benchmark",
+        dataset_id="test-dataset",
+        scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
+    )
+    benchmarks = await table.list_benchmarks()
+
+    assert len(benchmarks.data) == 1
+    benchmark_ids = {b.identifier for b in benchmarks.data}
+    assert "test-benchmark" in benchmark_ids
+
+
+@pytest.mark.asyncio
+async def test_tool_groups_routing_table(cached_disk_dist_registry):
+    table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, cached_disk_dist_registry)
+    await table.initialize()
+
+    # Register multiple tool groups and verify listing
+    await table.register_tool_group(
+        toolgroup_id="test-toolgroup",
+        provider_id="test_provider",
+    )
+    tool_groups = await table.list_tool_groups()
+
+    assert len(tool_groups.data) == 1
+    tool_group_ids = {tg.identifier for tg in tool_groups.data}
+    assert "test-toolgroup" in tool_group_ids
+
+    await table.unregister_toolgroup(toolgroup_id="test-toolgroup")
+    tool_groups = await table.list_tool_groups()
+    assert len(tool_groups.data) == 0
diff --git a/tests/unit/distribution/test_build_path.py b/tests/unit/distribution/test_build_path.py
new file mode 100644
index 000000000..555cdda4a
--- /dev/null
+++ b/tests/unit/distribution/test_build_path.py
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.cli.stack._build import (
+    _run_stack_build_command_from_build_config,
+)
+from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec
+from llama_stack.distribution.utils.image_types import LlamaStackImageType
+
+
+def test_container_build_passes_path(monkeypatch, tmp_path):
+    called_with = {}
+
+    def spy_build_image(cfg, build_file_path, image_name, template_or_config, run_config=None):
+        called_with["path"] = template_or_config
+        called_with["run_config"] = run_config
+        return 0
+
+    monkeypatch.setattr(
+        "llama_stack.cli.stack._build.build_image",
+        spy_build_image,
+        raising=True,
+    )
+
+    cfg = BuildConfig(
+        image_type=LlamaStackImageType.CONTAINER.value,
+        distribution_spec=DistributionSpec(providers={}, description=""),
+    )
+
+    _run_stack_build_command_from_build_config(cfg, image_name="dummy")
+
+    assert "path" in called_with
+    assert isinstance(called_with["path"], str)
+    assert Path(called_with["path"]).exists()
+    assert called_with["run_config"] is None
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index a4daffb82..ae24602d7 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict
+from typing import Any
 from unittest.mock import patch
 
 import pytest
@@ -23,7 +23,7 @@ class SampleConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
             "foo": "baz",
         }
diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py
new file mode 100644
index 000000000..7174d2e78
--- /dev/null
+++ b/tests/unit/fixtures.py
@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+
+from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry
+from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
+
+
+@pytest.fixture(scope="function")
+async def sqlite_kvstore(tmp_path):
+    db_path = tmp_path / "test_kv.db"
+    kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix())
+    kvstore = SqliteKVStoreImpl(kvstore_config)
+    await kvstore.initialize()
+    yield kvstore
+
+
+@pytest.fixture(scope="function")
+async def disk_dist_registry(sqlite_kvstore):
+    registry = DiskDistributionRegistry(sqlite_kvstore)
+    await registry.initialize()
+    yield registry
+
+
+@pytest.fixture(scope="function")
+async def cached_disk_dist_registry(sqlite_kvstore):
+    registry = CachedDiskDistributionRegistry(sqlite_kvstore)
+    await registry.initialize()
+    yield registry
diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py
new file mode 100644
index 000000000..9549f6df6
--- /dev/null
+++ b/tests/unit/providers/agent/test_meta_reference_agent.py
@@ -0,0 +1,234 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from datetime import datetime
+from unittest.mock import AsyncMock
+
+import pytest
+import pytest_asyncio
+
+from llama_stack.apis.agents import (
+    Agent,
+    AgentConfig,
+    AgentCreateResponse,
+)
+from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack.apis.inference import Inference
+from llama_stack.apis.safety import Safety
+from llama_stack.apis.tools import ToolGroups, ToolRuntime
+from llama_stack.apis.vector_io import VectorIO
+from llama_stack.providers.inline.agents.meta_reference.agents import MetaReferenceAgentsImpl
+from llama_stack.providers.inline.agents.meta_reference.config import MetaReferenceAgentsImplConfig
+from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo
+
+
+@pytest.fixture
+def mock_apis():
+    return {
+        "inference_api": AsyncMock(spec=Inference),
+        "vector_io_api": AsyncMock(spec=VectorIO),
+        "safety_api": AsyncMock(spec=Safety),
+        "tool_runtime_api": AsyncMock(spec=ToolRuntime),
+        "tool_groups_api": AsyncMock(spec=ToolGroups),
+    }
+
+
+@pytest.fixture
+def config(tmp_path):
+    return MetaReferenceAgentsImplConfig(
+        persistence_store={
+            "type": "sqlite",
+            "db_path": str(tmp_path / "test.db"),
+        },
+        responses_store={
+            "type": "sqlite",
+            "db_path": str(tmp_path / "test.db"),
+        },
+    )
+
+
+@pytest_asyncio.fixture
+async def agents_impl(config, mock_apis):
+    impl = MetaReferenceAgentsImpl(
+        config,
+        mock_apis["inference_api"],
+        mock_apis["vector_io_api"],
+        mock_apis["safety_api"],
+        mock_apis["tool_runtime_api"],
+        mock_apis["tool_groups_api"],
+    )
+    await impl.initialize()
+    yield impl
+    await impl.shutdown()
+
+
+@pytest.fixture
+def sample_agent_config():
+    return AgentConfig(
+        sampling_params={
+            "strategy": {"type": "greedy"},
+            "max_tokens": 0,
+            "repetition_penalty": 1.0,
+        },
+        input_shields=["string"],
+        output_shields=["string"],
+        toolgroups=["string"],
+        client_tools=[
+            {
+                "name": "string",
+                "description": "string",
+                "parameters": [
+                    {
+                        "name": "string",
+                        "parameter_type": "string",
+                        "description": "string",
+                        "required": True,
+                        "default": None,
+                    }
+                ],
+                "metadata": {
+                    "property1": None,
+                    "property2": None,
+                },
+            }
+        ],
+        tool_choice="auto",
+        tool_prompt_format="json",
+        tool_config={
+            "tool_choice": "auto",
+            "tool_prompt_format": "json",
+            "system_message_behavior": "append",
+        },
+        max_infer_iters=10,
+        model="string",
+        instructions="string",
+        enable_session_persistence=False,
+        response_format={
+            "type": "json_schema",
+            "json_schema": {
+                "property1": None,
+                "property2": None,
+            },
+        },
+    )
+
+
+@pytest.mark.asyncio
+async def test_create_agent(agents_impl, sample_agent_config):
+    response = await agents_impl.create_agent(sample_agent_config)
+
+    assert isinstance(response, AgentCreateResponse)
+    assert response.agent_id is not None
+
+    stored_agent = await agents_impl.persistence_store.get(f"agent:{response.agent_id}")
+    assert stored_agent is not None
+    agent_info = AgentInfo.model_validate_json(stored_agent)
+    assert agent_info.model == sample_agent_config.model
+    assert agent_info.created_at is not None
+    assert isinstance(agent_info.created_at, datetime)
+
+
+@pytest.mark.asyncio
+async def test_get_agent(agents_impl, sample_agent_config):
+    create_response = await agents_impl.create_agent(sample_agent_config)
+    agent_id = create_response.agent_id
+
+    agent = await agents_impl.get_agent(agent_id)
+
+    assert isinstance(agent, Agent)
+    assert agent.agent_id == agent_id
+    assert agent.agent_config.model == sample_agent_config.model
+    assert agent.created_at is not None
+    assert isinstance(agent.created_at, datetime)
+
+
+@pytest.mark.asyncio
+async def test_list_agents(agents_impl, sample_agent_config):
+    agent1_response = await agents_impl.create_agent(sample_agent_config)
+    agent2_response = await agents_impl.create_agent(sample_agent_config)
+
+    response = await agents_impl.list_agents()
+
+    assert isinstance(response, PaginatedResponse)
+    assert len(response.data) == 2
+    agent_ids = {agent["agent_id"] for agent in response.data}
+    assert agent1_response.agent_id in agent_ids
+    assert agent2_response.agent_id in agent_ids
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("enable_session_persistence", [True, False])
+async def test_create_agent_session_persistence(agents_impl, sample_agent_config, enable_session_persistence):
+    # Create an agent with specified persistence setting
+    config = sample_agent_config.model_copy()
+    config.enable_session_persistence = enable_session_persistence
+    response = await agents_impl.create_agent(config)
+    agent_id = response.agent_id
+
+    # Create a session
+    session_response = await agents_impl.create_agent_session(agent_id, "test_session")
+    assert session_response.session_id is not None
+
+    # Verify the session was stored
+    session = await agents_impl.get_agents_session(agent_id, session_response.session_id)
+    assert session.session_name == "test_session"
+    assert session.session_id == session_response.session_id
+    assert session.started_at is not None
+    assert session.turns == []
+
+    # Delete the session
+    await agents_impl.delete_agents_session(agent_id, session_response.session_id)
+
+    # Verify the session was deleted
+    with pytest.raises(ValueError):
+        await agents_impl.get_agents_session(agent_id, session_response.session_id)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("enable_session_persistence", [True, False])
+async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config, enable_session_persistence):
+    # Create an agent with specified persistence setting
+    config = sample_agent_config.model_copy()
+    config.enable_session_persistence = enable_session_persistence
+    response = await agents_impl.create_agent(config)
+    agent_id = response.agent_id
+
+    # Create multiple sessions
+    session1 = await agents_impl.create_agent_session(agent_id, "session1")
+    session2 = await agents_impl.create_agent_session(agent_id, "session2")
+
+    # List sessions
+    sessions = await agents_impl.list_agent_sessions(agent_id)
+    assert len(sessions.data) == 2
+    session_ids = {s["session_id"] for s in sessions.data}
+    assert session1.session_id in session_ids
+    assert session2.session_id in session_ids
+
+    # Delete one session
+    await agents_impl.delete_agents_session(agent_id, session1.session_id)
+
+    # Verify the session was deleted
+    with pytest.raises(ValueError):
+        await agents_impl.get_agents_session(agent_id, session1.session_id)
+
+    # List sessions again
+    sessions = await agents_impl.list_agent_sessions(agent_id)
+    assert len(sessions.data) == 1
+    assert session2.session_id in {s["session_id"] for s in sessions.data}
+
+
+@pytest.mark.asyncio
+async def test_delete_agent(agents_impl, sample_agent_config):
+    # Create an agent
+    response = await agents_impl.create_agent(sample_agent_config)
+    agent_id = response.agent_id
+
+    # Delete the agent
+    await agents_impl.delete_agent(agent_id)
+
+    # Verify the agent was deleted
+    with pytest.raises(ValueError):
+        await agents_impl.get_agent(agent_id)
diff --git a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
new file mode 100644
index 000000000..e112bb6e5
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+
+import yaml
+
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+)
+
+FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion:
+    fixture_path = os.path.join(FIXTURES_DIR, filename)
+
+    with open(fixture_path) as f:
+        data = yaml.safe_load(f)
+    return OpenAIChatCompletion(**data)
diff --git a/tests/unit/providers/agents/meta_reference/fixtures/simple_chat_completion.yaml b/tests/unit/providers/agents/meta_reference/fixtures/simple_chat_completion.yaml
new file mode 100644
index 000000000..4959349a0
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/fixtures/simple_chat_completion.yaml
@@ -0,0 +1,9 @@
+id: chat-completion-123
+choices:
+  - message:
+      content: "Dublin"
+      role: assistant
+    finish_reason: stop
+    index: 0
+created: 1234567890
+model: meta-llama/Llama-3.1-8B-Instruct
diff --git a/tests/unit/providers/agents/meta_reference/fixtures/tool_call_completion.yaml b/tests/unit/providers/agents/meta_reference/fixtures/tool_call_completion.yaml
new file mode 100644
index 000000000..f6532e3a9
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/fixtures/tool_call_completion.yaml
@@ -0,0 +1,14 @@
+id: chat-completion-123
+choices:
+  - message:
+      tool_calls:
+        - id: tool_call_123
+          type: function
+          function:
+            name: web_search
+            arguments: '{"query":"What is the capital of Ireland?"}'
+      role: assistant
+    finish_reason: stop
+    index: 0
+created: 1234567890
+model: meta-llama/Llama-3.1-8B-Instruct
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
new file mode 100644
index 000000000..5b6cee0ec
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -0,0 +1,696 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import AsyncMock
+
+import pytest
+from openai.types.chat.chat_completion_chunk import (
+    ChatCompletionChunk,
+    Choice,
+    ChoiceDelta,
+    ChoiceDeltaToolCall,
+    ChoiceDeltaToolCallFunction,
+)
+
+from llama_stack.apis.agents import Order
+from llama_stack.apis.agents.openai_responses import (
+    ListOpenAIResponseInputItem,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseMessage,
+    OpenAIResponseObjectWithInput,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageWebSearchToolCall,
+)
+from llama_stack.apis.inference.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIDeveloperMessageParam,
+    OpenAIUserMessageParam,
+)
+from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
+from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
+    OpenAIResponsesImpl,
+)
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
+from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
+
+
+@pytest.fixture
+def mock_inference_api():
+    inference_api = AsyncMock()
+    return inference_api
+
+
+@pytest.fixture
+def mock_tool_groups_api():
+    tool_groups_api = AsyncMock(spec=ToolGroups)
+    return tool_groups_api
+
+
+@pytest.fixture
+def mock_tool_runtime_api():
+    tool_runtime_api = AsyncMock(spec=ToolRuntime)
+    return tool_runtime_api
+
+
+@pytest.fixture
+def mock_responses_store():
+    responses_store = AsyncMock(spec=ResponsesStore)
+    return responses_store
+
+
+@pytest.fixture
+def openai_responses_impl(mock_inference_api, mock_tool_groups_api, mock_tool_runtime_api, mock_responses_store):
+    return OpenAIResponsesImpl(
+        inference_api=mock_inference_api,
+        tool_groups_api=mock_tool_groups_api,
+        tool_runtime_api=mock_tool_runtime_api,
+        responses_store=mock_responses_store,
+    )
+
+
+@pytest.mark.asyncio
+async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with a simple string input."""
+    # Setup
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    # Load the chat completion fixture
+    mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
+    mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
+
+    # Execute
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        temperature=0.1,
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once_with(
+        model=model,
+        messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
+        tools=None,
+        stream=False,
+        temperature=0.1,
+    )
+    openai_responses_impl.responses_store.store_response_object.assert_called_once()
+    assert result.model == model
+    assert len(result.output) == 1
+    assert isinstance(result.output[0], OpenAIResponseMessage)
+    assert result.output[0].content[0].text == "Dublin"
+
+
+@pytest.mark.asyncio
+async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with a simple string input and tools."""
+    # Setup
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    # Load the chat completion fixtures
+    tool_call_completion = load_chat_completion_fixture("tool_call_completion.yaml")
+    tool_response_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
+
+    mock_inference_api.openai_chat_completion.side_effect = [
+        tool_call_completion,
+        tool_response_completion,
+    ]
+
+    openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
+        identifier="web_search",
+        provider_id="client",
+        toolgroup_id="web_search",
+        tool_host="client",
+        description="Search the web for information",
+        parameters=[
+            ToolParameter(name="query", parameter_type="string", description="The query to search for", required=True)
+        ],
+    )
+
+    openai_responses_impl.tool_runtime_api.invoke_tool.return_value = ToolInvocationResult(
+        status="completed",
+        content="Dublin",
+    )
+
+    # Execute
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        temperature=0.1,
+        tools=[
+            OpenAIResponseInputToolWebSearch(
+                name="web_search",
+            )
+        ],
+    )
+
+    # Verify
+    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+    assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
+    assert first_call.kwargs["tools"] is not None
+    assert first_call.kwargs["temperature"] == 0.1
+
+    second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
+    assert second_call.kwargs["messages"][-1].content == "Dublin"
+    assert second_call.kwargs["temperature"] == 0.1
+
+    openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
+    openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
+        tool_name="web_search",
+        kwargs={"query": "What is the capital of Ireland?"},
+    )
+
+    openai_responses_impl.responses_store.store_response_object.assert_called_once()
+
+    # Check that we got the content from our mocked tool execution result
+    assert len(result.output) >= 1
+    assert isinstance(result.output[1], OpenAIResponseMessage)
+    assert result.output[1].content[0].text == "Dublin"
+
+
+@pytest.mark.asyncio
+async def test_create_openai_response_with_tool_call_type_none(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with a tool call response that has a type of None."""
+    # Setup
+    input_text = "How hot it is in San Francisco today?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    async def fake_stream():
+        yield ChatCompletionChunk(
+            id="123",
+            choices=[
+                Choice(
+                    index=0,
+                    delta=ChoiceDelta(
+                        tool_calls=[
+                            ChoiceDeltaToolCall(
+                                index=0,
+                                id="tc_123",
+                                function=ChoiceDeltaToolCallFunction(name="get_weather", arguments="{}"),
+                                type=None,
+                            )
+                        ]
+                    ),
+                ),
+            ],
+            created=1,
+            model=model,
+            object="chat.completion.chunk",
+        )
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        stream=True,
+        temperature=0.1,
+        tools=[
+            OpenAIResponseInputToolFunction(
+                name="get_weather",
+                description="Get current temperature for a given location.",
+                parameters={
+                    "location": "string",
+                },
+            )
+        ],
+    )
+
+    # Verify
+    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+    assert first_call.kwargs["messages"][0].content == input_text
+    assert first_call.kwargs["tools"] is not None
+    assert first_call.kwargs["temperature"] == 0.1
+
+    # Check that we got the content from our mocked tool execution result
+    chunks = [chunk async for chunk in result]
+    assert len(chunks) == 2  # Should have response.created and response.completed
+
+    # Check response.created event (should have empty output)
+    assert chunks[0].type == "response.created"
+    assert len(chunks[0].response.output) == 0
+
+    # Check response.completed event (should have the tool call)
+    assert chunks[1].type == "response.completed"
+    assert len(chunks[1].response.output) == 1
+    assert chunks[1].response.output[0].type == "function_call"
+    assert chunks[1].response.output[0].name == "get_weather"
+
+
+@pytest.mark.asyncio
+async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with multiple messages."""
+    # Setup
+    input_messages = [
+        OpenAIResponseMessage(role="developer", content="You are a helpful assistant", name=None),
+        OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
+        OpenAIResponseMessage(
+            role="assistant",
+            content=[
+                OpenAIResponseInputMessageContentText(text="Galway, Longford, Sligo"),
+                OpenAIResponseInputMessageContentText(text="Dublin"),
+            ],
+            name=None,
+        ),
+        OpenAIResponseMessage(role="user", content="Which is the largest town in Ireland?", name=None),
+    ]
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    mock_inference_api.openai_chat_completion.return_value = load_chat_completion_fixture("simple_chat_completion.yaml")
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input=input_messages,
+        model=model,
+        temperature=0.1,
+    )
+
+    # Verify the the correct messages were sent to the inference API i.e.
+    # All of the responses message were convered to the chat completion message objects
+    inference_messages = mock_inference_api.openai_chat_completion.call_args_list[0].kwargs["messages"]
+    for i, m in enumerate(input_messages):
+        if isinstance(m.content, str):
+            assert inference_messages[i].content == m.content
+        else:
+            assert inference_messages[i].content[0].text == m.content[0].text
+            assert isinstance(inference_messages[i].content[0], OpenAIChatCompletionContentPartTextParam)
+        assert inference_messages[i].role == m.role
+        if m.role == "user":
+            assert isinstance(inference_messages[i], OpenAIUserMessageParam)
+        elif m.role == "assistant":
+            assert isinstance(inference_messages[i], OpenAIAssistantMessageParam)
+        else:
+            assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam)
+
+
+@pytest.mark.asyncio
+async def test_prepend_previous_response_none(openai_responses_impl):
+    """Test prepending no previous response to a new response."""
+
+    input = await openai_responses_impl._prepend_previous_response("fake_input", None)
+    assert input == "fake_input"
+
+
+@pytest.mark.asyncio
+async def test_prepend_previous_response_basic(openai_responses_impl, mock_responses_store):
+    """Test prepending a basic previous response to a new response."""
+
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
+        role="user",
+    )
+    response_output_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseOutputMessageContentOutputText(text="fake_response")],
+        status="completed",
+        role="assistant",
+    )
+    previous_response = OpenAIResponseObjectWithInput(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[response_output_message],
+        status="completed",
+        input=[input_item_message],
+    )
+    mock_responses_store.get_response_object.return_value = previous_response
+
+    input = await openai_responses_impl._prepend_previous_response("fake_input", "resp_123")
+
+    assert len(input) == 3
+    # Check for previous input
+    assert isinstance(input[0], OpenAIResponseMessage)
+    assert input[0].content[0].text == "fake_previous_input"
+    # Check for previous output
+    assert isinstance(input[1], OpenAIResponseMessage)
+    assert input[1].content[0].text == "fake_response"
+    # Check for new input
+    assert isinstance(input[2], OpenAIResponseMessage)
+    assert input[2].content == "fake_input"
+
+
+@pytest.mark.asyncio
+async def test_prepend_previous_response_web_search(openai_responses_impl, mock_responses_store):
+    """Test prepending a web search previous response to a new response."""
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
+        role="user",
+    )
+    output_web_search = OpenAIResponseOutputMessageWebSearchToolCall(
+        id="ws_123",
+        status="completed",
+    )
+    output_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseOutputMessageContentOutputText(text="fake_web_search_response")],
+        status="completed",
+        role="assistant",
+    )
+    response = OpenAIResponseObjectWithInput(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[output_web_search, output_message],
+        status="completed",
+        input=[input_item_message],
+    )
+    mock_responses_store.get_response_object.return_value = response
+
+    input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
+    input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
+
+    assert len(input) == 4
+    # Check for previous input
+    assert isinstance(input[0], OpenAIResponseMessage)
+    assert input[0].content[0].text == "fake_previous_input"
+    # Check for previous output web search tool call
+    assert isinstance(input[1], OpenAIResponseOutputMessageWebSearchToolCall)
+    # Check for previous output web search response
+    assert isinstance(input[2], OpenAIResponseMessage)
+    assert input[2].content[0].text == "fake_web_search_response"
+    # Check for new input
+    assert isinstance(input[3], OpenAIResponseMessage)
+    assert input[3].content == "fake_input"
+
+
+@pytest.mark.asyncio
+async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api):
+    # Setup
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    instructions = "You are a geography expert. Provide concise answers."
+
+    # Load the chat completion fixture
+    mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
+    mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        instructions=instructions,
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.kwargs["messages"]
+
+    # Check that instructions were prepended as a system message
+    assert len(sent_messages) == 2
+    assert sent_messages[0].role == "system"
+    assert sent_messages[0].content == instructions
+    assert sent_messages[1].role == "user"
+    assert sent_messages[1].content == input_text
+
+
+@pytest.mark.asyncio
+async def test_create_openai_response_with_instructions_and_multiple_messages(
+    openai_responses_impl, mock_inference_api
+):
+    # Setup
+    input_messages = [
+        OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
+        OpenAIResponseMessage(
+            role="assistant",
+            content="Galway, Longford, Sligo",
+            name=None,
+        ),
+        OpenAIResponseMessage(role="user", content="Which is the largest?", name=None),
+    ]
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    instructions = "You are a geography expert. Provide concise answers."
+
+    mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
+    mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input=input_messages,
+        model=model,
+        instructions=instructions,
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.kwargs["messages"]
+
+    # Check that instructions were prepended as a system message
+    assert len(sent_messages) == 4  # 1 system + 3 input messages
+    assert sent_messages[0].role == "system"
+    assert sent_messages[0].content == instructions
+
+    # Check the rest of the messages were converted correctly
+    assert sent_messages[1].role == "user"
+    assert sent_messages[1].content == "Name some towns in Ireland"
+    assert sent_messages[2].role == "assistant"
+    assert sent_messages[2].content == "Galway, Longford, Sligo"
+    assert sent_messages[3].role == "user"
+    assert sent_messages[3].content == "Which is the largest?"
+
+
+@pytest.mark.asyncio
+async def test_create_openai_response_with_instructions_and_previous_response(
+    openai_responses_impl, mock_responses_store, mock_inference_api
+):
+    """Test prepending both instructions and previous response."""
+
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content="Name some towns in Ireland",
+        role="user",
+    )
+    response_output_message = OpenAIResponseMessage(
+        id="123",
+        content="Galway, Longford, Sligo",
+        status="completed",
+        role="assistant",
+    )
+    response = OpenAIResponseObjectWithInput(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[response_output_message],
+        status="completed",
+        input=[input_item_message],
+    )
+    mock_responses_store.get_response_object.return_value = response
+
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    instructions = "You are a geography expert. Provide concise answers."
+    mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
+    mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.kwargs["messages"]
+
+    # Check that instructions were prepended as a system message
+    assert len(sent_messages) == 4, sent_messages
+    assert sent_messages[0].role == "system"
+    assert sent_messages[0].content == instructions
+
+    # Check the rest of the messages were converted correctly
+    assert sent_messages[1].role == "user"
+    assert sent_messages[1].content == "Name some towns in Ireland"
+    assert sent_messages[2].role == "assistant"
+    assert sent_messages[2].content == "Galway, Longford, Sligo"
+    assert sent_messages[3].role == "user"
+    assert sent_messages[3].content == "Which is the largest?"
+
+
+@pytest.mark.asyncio
+async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store):
+    """Test that list_openai_response_input_items properly delegates to responses_store with correct parameters."""
+    # Setup
+    response_id = "resp_123"
+    after = "msg_after"
+    before = "msg_before"
+    include = ["metadata"]
+    limit = 5
+    order = Order.asc
+
+    input_message = OpenAIResponseMessage(
+        id="msg_123",
+        content="Test message",
+        role="user",
+    )
+
+    expected_result = ListOpenAIResponseInputItem(data=[input_message])
+    mock_responses_store.list_response_input_items.return_value = expected_result
+
+    # Execute with all parameters to test delegation
+    result = await openai_responses_impl.list_openai_response_input_items(
+        response_id, after=after, before=before, include=include, limit=limit, order=order
+    )
+
+    # Verify all parameters are passed through correctly to the store
+    mock_responses_store.list_response_input_items.assert_called_once_with(
+        response_id, after, before, include, limit, order
+    )
+
+    # Verify the result is returned as-is from the store
+    assert result.object == "list"
+    assert len(result.data) == 1
+    assert result.data[0].id == "msg_123"
+
+
+@pytest.mark.asyncio
+async def test_responses_store_list_input_items_logic():
+    """Test ResponsesStore list_response_input_items logic - mocks get_response_object to test actual ordering/limiting."""
+
+    # Create mock store and response store
+    mock_sql_store = AsyncMock()
+    responses_store = ResponsesStore(sql_store_config=None)
+    responses_store.sql_store = mock_sql_store
+
+    # Setup test data - multiple input items
+    input_items = [
+        OpenAIResponseMessage(id="msg_1", content="First message", role="user"),
+        OpenAIResponseMessage(id="msg_2", content="Second message", role="user"),
+        OpenAIResponseMessage(id="msg_3", content="Third message", role="user"),
+        OpenAIResponseMessage(id="msg_4", content="Fourth message", role="user"),
+    ]
+
+    response_with_input = OpenAIResponseObjectWithInput(
+        id="resp_123",
+        model="test_model",
+        created_at=1234567890,
+        object="response",
+        status="completed",
+        output=[],
+        input=input_items,
+    )
+
+    # Mock the get_response_object method to return our test data
+    mock_sql_store.fetch_one.return_value = {"response_object": response_with_input.model_dump()}
+
+    # Test 1: Default behavior (no limit, desc order)
+    result = await responses_store.list_response_input_items("resp_123")
+    assert result.object == "list"
+    assert len(result.data) == 4
+    # Should be reversed for desc order
+    assert result.data[0].id == "msg_4"
+    assert result.data[1].id == "msg_3"
+    assert result.data[2].id == "msg_2"
+    assert result.data[3].id == "msg_1"
+
+    # Test 2: With limit=2, desc order
+    result = await responses_store.list_response_input_items("resp_123", limit=2, order=Order.desc)
+    assert result.object == "list"
+    assert len(result.data) == 2
+    # Should be first 2 items in desc order
+    assert result.data[0].id == "msg_4"
+    assert result.data[1].id == "msg_3"
+
+    # Test 3: With limit=2, asc order
+    result = await responses_store.list_response_input_items("resp_123", limit=2, order=Order.asc)
+    assert result.object == "list"
+    assert len(result.data) == 2
+    # Should be first 2 items in original order (asc)
+    assert result.data[0].id == "msg_1"
+    assert result.data[1].id == "msg_2"
+
+    # Test 4: Asc order without limit
+    result = await responses_store.list_response_input_items("resp_123", order=Order.asc)
+    assert result.object == "list"
+    assert len(result.data) == 4
+    # Should be in original order (asc)
+    assert result.data[0].id == "msg_1"
+    assert result.data[1].id == "msg_2"
+    assert result.data[2].id == "msg_3"
+    assert result.data[3].id == "msg_4"
+
+    # Test 5: Large limit (larger than available items)
+    result = await responses_store.list_response_input_items("resp_123", limit=10, order=Order.desc)
+    assert result.object == "list"
+    assert len(result.data) == 4  # Should return all available items
+    assert result.data[0].id == "msg_4"
+
+    # Test 6: Zero limit edge case
+    result = await responses_store.list_response_input_items("resp_123", limit=0, order=Order.asc)
+    assert result.object == "list"
+    assert len(result.data) == 0  # Should return no items
+
+
+@pytest.mark.asyncio
+async def test_store_response_uses_rehydrated_input_with_previous_response(
+    openai_responses_impl, mock_responses_store, mock_inference_api
+):
+    """Test that _store_response uses the full re-hydrated input (including previous responses)
+    rather than just the original input when previous_response_id is provided."""
+
+    # Setup - Create a previous response that should be included in the stored input
+    previous_response = OpenAIResponseObjectWithInput(
+        id="resp-previous-123",
+        object="response",
+        created_at=1234567890,
+        model="meta-llama/Llama-3.1-8B-Instruct",
+        status="completed",
+        input=[
+            OpenAIResponseMessage(
+                id="msg-prev-user", role="user", content=[OpenAIResponseInputMessageContentText(text="What is 2+2?")]
+            )
+        ],
+        output=[
+            OpenAIResponseMessage(
+                id="msg-prev-assistant",
+                role="assistant",
+                content=[OpenAIResponseOutputMessageContentOutputText(text="2+2 equals 4.")],
+            )
+        ],
+    )
+
+    mock_responses_store.get_response_object.return_value = previous_response
+
+    current_input = "Now what is 3+3?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
+    mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
+
+    # Execute - Create response with previous_response_id
+    result = await openai_responses_impl.create_openai_response(
+        input=current_input,
+        model=model,
+        previous_response_id="resp-previous-123",
+        store=True,
+    )
+
+    store_call_args = mock_responses_store.store_response_object.call_args
+    stored_input = store_call_args.kwargs["input"]
+
+    # Verify that the stored input contains the full re-hydrated conversation:
+    # 1. Previous user message
+    # 2. Previous assistant response
+    # 3. Current user message
+    assert len(stored_input) == 3
+
+    assert stored_input[0].role == "user"
+    assert stored_input[0].content[0].text == "What is 2+2?"
+
+    assert stored_input[1].role == "assistant"
+    assert stored_input[1].content[0].text == "2+2 equals 4."
+
+    assert stored_input[2].role == "user"
+    assert stored_input[2].content == "Now what is 3+3?"
+
+    # Verify the response itself is correct
+    assert result.model == model
+    assert result.status == "completed"
diff --git a/tests/unit/providers/agents/test_persistence_access_control.py b/tests/unit/providers/agents/test_persistence_access_control.py
index ab181a4ae..48fa647a8 100644
--- a/tests/unit/providers/agents/test_persistence_access_control.py
+++ b/tests/unit/providers/agents/test_persistence_access_control.py
@@ -4,9 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import os
-import shutil
-import tempfile
 import uuid
 from datetime import datetime
 from unittest.mock import patch
@@ -17,20 +14,12 @@ from llama_stack.apis.agents import Turn
 from llama_stack.apis.inference import CompletionMessage, StopReason
 from llama_stack.distribution.datatypes import AccessAttributes
 from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
 
 
 @pytest.fixture
-async def test_setup():
-    temp_dir = tempfile.mkdtemp()
-    db_path = os.path.join(temp_dir, "test_persistence_access_control.db")
-    kvstore_config = SqliteKVStoreConfig(db_path=db_path)
-    kvstore = SqliteKVStoreImpl(kvstore_config)
-    await kvstore.initialize()
-    agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=kvstore)
+async def test_setup(sqlite_kvstore):
+    agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore)
     yield agent_persistence
-    shutil.rmtree(temp_dir)
 
 
 @pytest.mark.asyncio
@@ -65,6 +54,7 @@ async def test_session_access_control(mock_get_auth_attributes, test_setup):
         session_name="Restricted Session",
         started_at=datetime.now(),
         access_attributes=AccessAttributes(roles=["admin"], teams=["security-team"]),
+        turns=[],
     )
 
     await agent_persistence.kvstore.set(
@@ -96,6 +86,7 @@ async def test_turn_access_control(mock_get_auth_attributes, test_setup):
         session_name="Restricted Session",
         started_at=datetime.now(),
         access_attributes=AccessAttributes(roles=["admin"]),
+        turns=[],
     )
 
     await agent_persistence.kvstore.set(
@@ -148,6 +139,7 @@ async def test_tool_call_and_infer_iters_access_control(mock_get_auth_attributes
         session_name="Restricted Session",
         started_at=datetime.now(),
         access_attributes=AccessAttributes(roles=["admin"]),
+        turns=[],
     )
 
     await agent_persistence.kvstore.set(
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 9c2281d85..f9eaee7d6 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -10,7 +10,7 @@ import logging
 import threading
 import time
 from http.server import BaseHTTPRequestHandler, HTTPServer
-from typing import Any, Dict
+from typing import Any
 from unittest.mock import AsyncMock, patch
 
 import pytest
@@ -24,11 +24,26 @@ from openai.types.chat.chat_completion_chunk import (
 from openai.types.chat.chat_completion_chunk import (
     ChoiceDelta as OpenAIChoiceDelta,
 )
+from openai.types.chat.chat_completion_chunk import (
+    ChoiceDeltaToolCall as OpenAIChoiceDeltaToolCall,
+)
+from openai.types.chat.chat_completion_chunk import (
+    ChoiceDeltaToolCallFunction as OpenAIChoiceDeltaToolCallFunction,
+)
 from openai.types.model import Model as OpenAIModel
 
-from llama_stack.apis.inference import ToolChoice, ToolConfig
+from llama_stack.apis.inference import (
+    ChatCompletionRequest,
+    ChatCompletionResponseEventType,
+    CompletionMessage,
+    SystemMessage,
+    ToolChoice,
+    ToolConfig,
+    ToolResponseMessage,
+    UserMessage,
+)
 from llama_stack.apis.models import Model
-from llama_stack.models.llama.datatypes import StopReason
+from llama_stack.models.llama.datatypes import StopReason, ToolCall
 from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
 from llama_stack.providers.remote.inference.vllm.vllm import (
     VLLMInferenceAdapter,
@@ -47,7 +62,7 @@ from llama_stack.providers.remote.inference.vllm.vllm import (
 
 
 class MockInferenceAdapterWithSleep:
-    def __init__(self, sleep_time: int, response: Dict[str, Any]):
+    def __init__(self, sleep_time: int, response: dict[str, Any]):
         self.httpd = None
 
         class DelayedRequestHandler(BaseHTTPRequestHandler):
@@ -130,6 +145,49 @@ async def test_old_vllm_tool_choice(vllm_inference_adapter):
         assert request.tool_config.tool_choice == ToolChoice.none
 
 
+@pytest.mark.asyncio
+async def test_tool_call_response(vllm_inference_adapter):
+    """Verify that tool call arguments from a CompletionMessage are correctly converted
+    into the expected JSON format."""
+
+    # Patch the call to vllm so we can inspect the arguments sent were correct
+    with patch.object(
+        vllm_inference_adapter.client.chat.completions, "create", new_callable=AsyncMock
+    ) as mock_nonstream_completion:
+        messages = [
+            SystemMessage(content="You are a helpful assistant"),
+            UserMessage(content="How many?"),
+            CompletionMessage(
+                content="",
+                stop_reason=StopReason.end_of_turn,
+                tool_calls=[
+                    ToolCall(
+                        call_id="foo",
+                        tool_name="knowledge_search",
+                        arguments={"query": "How many?"},
+                        arguments_json='{"query": "How many?"}',
+                    )
+                ],
+            ),
+            ToolResponseMessage(call_id="foo", content="knowledge_search found 5...."),
+        ]
+        await vllm_inference_adapter.chat_completion(
+            "mock-model",
+            messages,
+            stream=False,
+            tools=[],
+            tool_config=ToolConfig(tool_choice=ToolChoice.auto),
+        )
+
+        assert mock_nonstream_completion.call_args.kwargs["messages"][2]["tool_calls"] == [
+            {
+                "id": "foo",
+                "type": "function",
+                "function": {"name": "knowledge_search", "arguments": '{"query": "How many?"}'},
+            }
+        ]
+
+
 @pytest.mark.asyncio
 async def test_tool_call_delta_empty_tool_call_buf():
     """
@@ -154,8 +212,164 @@ async def test_tool_call_delta_empty_tool_call_buf():
             yield chunk
 
     chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
-    assert len(chunks) == 1
-    assert chunks[0].event.stop_reason == StopReason.end_of_turn
+    assert len(chunks) == 2
+    assert chunks[0].event.event_type.value == "start"
+    assert chunks[1].event.event_type.value == "complete"
+    assert chunks[1].event.stop_reason == StopReason.end_of_turn
+
+
+@pytest.mark.asyncio
+async def test_tool_call_delta_streaming_arguments_dict():
+    async def mock_stream():
+        mock_chunk_1 = OpenAIChatCompletionChunk(
+            id="chunk-1",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(
+                    delta=OpenAIChoiceDelta(
+                        content="",
+                        tool_calls=[
+                            OpenAIChoiceDeltaToolCall(
+                                id="tc_1",
+                                index=1,
+                                function=OpenAIChoiceDeltaToolCallFunction(
+                                    name="power",
+                                    arguments="",
+                                ),
+                            )
+                        ],
+                    ),
+                    finish_reason=None,
+                    index=0,
+                )
+            ],
+        )
+        mock_chunk_2 = OpenAIChatCompletionChunk(
+            id="chunk-2",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(
+                    delta=OpenAIChoiceDelta(
+                        content="",
+                        tool_calls=[
+                            OpenAIChoiceDeltaToolCall(
+                                id="tc_1",
+                                index=1,
+                                function=OpenAIChoiceDeltaToolCallFunction(
+                                    name="power",
+                                    arguments='{"number": 28, "power": 3}',
+                                ),
+                            )
+                        ],
+                    ),
+                    finish_reason=None,
+                    index=0,
+                )
+            ],
+        )
+        mock_chunk_3 = OpenAIChatCompletionChunk(
+            id="chunk-3",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
+            ],
+        )
+        for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 3
+    assert chunks[0].event.event_type.value == "start"
+    assert chunks[1].event.event_type.value == "progress"
+    assert chunks[1].event.delta.type == "tool_call"
+    assert chunks[1].event.delta.parse_status.value == "succeeded"
+    assert chunks[1].event.delta.tool_call.arguments_json == '{"number": 28, "power": 3}'
+    assert chunks[2].event.event_type.value == "complete"
+
+
+@pytest.mark.asyncio
+async def test_multiple_tool_calls():
+    async def mock_stream():
+        mock_chunk_1 = OpenAIChatCompletionChunk(
+            id="chunk-1",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(
+                    delta=OpenAIChoiceDelta(
+                        content="",
+                        tool_calls=[
+                            OpenAIChoiceDeltaToolCall(
+                                id="",
+                                index=1,
+                                function=OpenAIChoiceDeltaToolCallFunction(
+                                    name="power",
+                                    arguments='{"number": 28, "power": 3}',
+                                ),
+                            ),
+                        ],
+                    ),
+                    finish_reason=None,
+                    index=0,
+                )
+            ],
+        )
+        mock_chunk_2 = OpenAIChatCompletionChunk(
+            id="chunk-2",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(
+                    delta=OpenAIChoiceDelta(
+                        content="",
+                        tool_calls=[
+                            OpenAIChoiceDeltaToolCall(
+                                id="",
+                                index=2,
+                                function=OpenAIChoiceDeltaToolCallFunction(
+                                    name="multiple",
+                                    arguments='{"first_number": 4, "second_number": 7}',
+                                ),
+                            ),
+                        ],
+                    ),
+                    finish_reason=None,
+                    index=0,
+                )
+            ],
+        )
+        mock_chunk_3 = OpenAIChatCompletionChunk(
+            id="chunk-3",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
+            ],
+        )
+        for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 4
+    assert chunks[0].event.event_type.value == "start"
+    assert chunks[1].event.event_type.value == "progress"
+    assert chunks[1].event.delta.type == "tool_call"
+    assert chunks[1].event.delta.parse_status.value == "succeeded"
+    assert chunks[1].event.delta.tool_call.arguments_json == '{"number": 28, "power": 3}'
+    assert chunks[2].event.event_type.value == "progress"
+    assert chunks[2].event.delta.type == "tool_call"
+    assert chunks[2].event.delta.parse_status.value == "succeeded"
+    assert chunks[2].event.delta.tool_call.arguments_json == '{"first_number": 4, "second_number": 7}'
+    assert chunks[3].event.event_type.value == "complete"
 
 
 @pytest.mark.asyncio
@@ -179,7 +393,8 @@ async def test_process_vllm_chat_completion_stream_response_no_choices():
             yield chunk
 
     chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
-    assert len(chunks) == 0
+    assert len(chunks) == 1
+    assert chunks[0].event.event_type.value == "start"
 
 
 def test_chat_completion_doesnt_block_event_loop(caplog):
@@ -232,3 +447,195 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
     # above.
     asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
     assert not asyncio_warnings
+
+
+@pytest.mark.asyncio
+async def test_get_params_empty_tools(vllm_inference_adapter):
+    request = ChatCompletionRequest(
+        tools=[],
+        model="test_model",
+        messages=[UserMessage(content="test")],
+    )
+    params = await vllm_inference_adapter._get_params(request)
+    assert "tools" not in params
+
+
+@pytest.mark.asyncio
+async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_chunk():
+    """
+    Tests the edge case where the model returns the arguments for the tool call in the same chunk that
+    contains the finish reason (i.e., the last one).
+    We want to make sure the tool call is executed in this case, and the parameters are passed correctly.
+    """
+
+    mock_tool_name = "mock_tool"
+    mock_tool_arguments = {"arg1": 0, "arg2": 100}
+    mock_tool_arguments_str = json.dumps(mock_tool_arguments)
+
+    async def mock_stream():
+        mock_chunks = [
+            OpenAIChatCompletionChunk(
+                id="chunk-1",
+                created=1,
+                model="foo",
+                object="chat.completion.chunk",
+                choices=[
+                    {
+                        "delta": {
+                            "content": None,
+                            "tool_calls": [
+                                {
+                                    "index": 0,
+                                    "id": "mock_id",
+                                    "type": "function",
+                                    "function": {
+                                        "name": mock_tool_name,
+                                        "arguments": None,
+                                    },
+                                }
+                            ],
+                        },
+                        "finish_reason": None,
+                        "logprobs": None,
+                        "index": 0,
+                    }
+                ],
+            ),
+            OpenAIChatCompletionChunk(
+                id="chunk-1",
+                created=1,
+                model="foo",
+                object="chat.completion.chunk",
+                choices=[
+                    {
+                        "delta": {
+                            "content": None,
+                            "tool_calls": [
+                                {
+                                    "index": 0,
+                                    "id": None,
+                                    "function": {
+                                        "name": None,
+                                        "arguments": mock_tool_arguments_str,
+                                    },
+                                }
+                            ],
+                        },
+                        "finish_reason": "tool_calls",
+                        "logprobs": None,
+                        "index": 0,
+                    }
+                ],
+            ),
+        ]
+        for chunk in mock_chunks:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 3
+    assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
+    assert chunks[-2].event.delta.type == "tool_call"
+    assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
+    assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
+
+
+@pytest.mark.asyncio
+async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
+    """
+    Tests the edge case where the model requests a tool call and stays idle without explicitly providing the
+    finish reason.
+    We want to make sure that this case is recognized and handled correctly, i.e., as a valid end of message.
+    """
+
+    mock_tool_name = "mock_tool"
+    mock_tool_arguments = {"arg1": 0, "arg2": 100}
+    mock_tool_arguments_str = '"{\\"arg1\\": 0, \\"arg2\\": 100}"'
+
+    async def mock_stream():
+        mock_chunks = [
+            OpenAIChatCompletionChunk(
+                id="chunk-1",
+                created=1,
+                model="foo",
+                object="chat.completion.chunk",
+                choices=[
+                    {
+                        "delta": {
+                            "content": None,
+                            "tool_calls": [
+                                {
+                                    "index": 0,
+                                    "id": "mock_id",
+                                    "type": "function",
+                                    "function": {
+                                        "name": mock_tool_name,
+                                        "arguments": mock_tool_arguments_str,
+                                    },
+                                }
+                            ],
+                        },
+                        "finish_reason": None,
+                        "logprobs": None,
+                        "index": 0,
+                    }
+                ],
+            ),
+        ]
+        for chunk in mock_chunks:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 3
+    assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
+    assert chunks[-2].event.delta.type == "tool_call"
+    assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
+    assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
+
+
+@pytest.mark.asyncio
+async def test_process_vllm_chat_completion_stream_response_tool_without_args():
+    """
+    Tests the edge case where no arguments are provided for the tool call.
+    Tool calls with no arguments should be treated as regular tool calls, which was not the case until now.
+    """
+    mock_tool_name = "mock_tool"
+
+    async def mock_stream():
+        mock_chunks = [
+            OpenAIChatCompletionChunk(
+                id="chunk-1",
+                created=1,
+                model="foo",
+                object="chat.completion.chunk",
+                choices=[
+                    {
+                        "delta": {
+                            "content": None,
+                            "tool_calls": [
+                                {
+                                    "index": 0,
+                                    "id": "mock_id",
+                                    "type": "function",
+                                    "function": {
+                                        "name": mock_tool_name,
+                                        "arguments": "",
+                                    },
+                                }
+                            ],
+                        },
+                        "finish_reason": None,
+                        "logprobs": None,
+                        "index": 0,
+                    }
+                ],
+            ),
+        ]
+        for chunk in mock_chunks:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 3
+    assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
+    assert chunks[-2].event.delta.type == "tool_call"
+    assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
+    assert chunks[-2].event.delta.tool_call.arguments == {}
diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py
new file mode 100644
index 000000000..a17e51a9c
--- /dev/null
+++ b/tests/unit/providers/nvidia/test_datastore.py
@@ -0,0 +1,138 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+import unittest
+from unittest.mock import patch
+
+import pytest
+
+from llama_stack.apis.datasets import Dataset, DatasetPurpose, URIDataSource
+from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
+from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
+
+
+class TestNvidiaDatastore(unittest.TestCase):
+    def setUp(self):
+        os.environ["NVIDIA_DATASETS_URL"] = "http://nemo.test/datasets"
+
+        config = NvidiaDatasetIOConfig(
+            datasets_url=os.environ["NVIDIA_DATASETS_URL"], dataset_namespace="default", project_id="default"
+        )
+        self.adapter = NvidiaDatasetIOAdapter(config)
+        self.make_request_patcher = patch(
+            "llama_stack.providers.remote.datasetio.nvidia.datasetio.NvidiaDatasetIOAdapter._make_request"
+        )
+        self.mock_make_request = self.make_request_patcher.start()
+
+    def tearDown(self):
+        self.make_request_patcher.stop()
+
+    @pytest.fixture(autouse=True)
+    def inject_fixtures(self, run_async):
+        self.run_async = run_async
+
+    def _assert_request(self, mock_call, expected_method, expected_path, expected_json=None):
+        """Helper method to verify request details in mock calls."""
+        call_args = mock_call.call_args
+
+        assert call_args[0][0] == expected_method
+        assert call_args[0][1] == expected_path
+
+        if expected_json:
+            for key, value in expected_json.items():
+                assert call_args[1]["json"][key] == value
+
+    def test_register_dataset(self):
+        self.mock_make_request.return_value = {
+            "id": "dataset-123456",
+            "name": "test-dataset",
+            "namespace": "default",
+        }
+
+        dataset_def = Dataset(
+            identifier="test-dataset",
+            type="dataset",
+            provider_resource_id="",
+            provider_id="",
+            purpose=DatasetPurpose.post_training_messages,
+            source=URIDataSource(uri="https://example.com/data.jsonl"),
+            metadata={"provider_id": "nvidia", "format": "jsonl", "description": "Test dataset description"},
+        )
+
+        self.run_async(self.adapter.register_dataset(dataset_def))
+
+        self.mock_make_request.assert_called_once()
+        self._assert_request(
+            self.mock_make_request,
+            "POST",
+            "/v1/datasets",
+            expected_json={
+                "name": "test-dataset",
+                "namespace": "default",
+                "files_url": "https://example.com/data.jsonl",
+                "project": "default",
+                "format": "jsonl",
+                "description": "Test dataset description",
+            },
+        )
+
+    def test_unregister_dataset(self):
+        self.mock_make_request.return_value = {
+            "message": "Resource deleted successfully.",
+            "id": "dataset-81RSQp7FKX3rdBtKvF9Skn",
+            "deleted_at": None,
+        }
+        dataset_id = "test-dataset"
+
+        self.run_async(self.adapter.unregister_dataset(dataset_id))
+
+        self.mock_make_request.assert_called_once()
+        self._assert_request(self.mock_make_request, "DELETE", "/v1/datasets/default/test-dataset")
+
+    def test_register_dataset_with_custom_namespace_project(self):
+        custom_config = NvidiaDatasetIOConfig(
+            datasets_url=os.environ["NVIDIA_DATASETS_URL"],
+            dataset_namespace="custom-namespace",
+            project_id="custom-project",
+        )
+        custom_adapter = NvidiaDatasetIOAdapter(custom_config)
+
+        self.mock_make_request.return_value = {
+            "id": "dataset-123456",
+            "name": "test-dataset",
+            "namespace": "custom-namespace",
+        }
+
+        dataset_def = Dataset(
+            identifier="test-dataset",
+            type="dataset",
+            provider_resource_id="",
+            provider_id="",
+            purpose=DatasetPurpose.post_training_messages,
+            source=URIDataSource(uri="https://example.com/data.jsonl"),
+            metadata={"format": "jsonl"},
+        )
+
+        self.run_async(custom_adapter.register_dataset(dataset_def))
+
+        self.mock_make_request.assert_called_once()
+        self._assert_request(
+            self.mock_make_request,
+            "POST",
+            "/v1/datasets",
+            expected_json={
+                "name": "test-dataset",
+                "namespace": "custom-namespace",
+                "files_url": "https://example.com/data.jsonl",
+                "project": "custom-project",
+                "format": "jsonl",
+            },
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
new file mode 100644
index 000000000..584ca2101
--- /dev/null
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -0,0 +1,201 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+import unittest
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from llama_stack.apis.benchmarks import Benchmark
+from llama_stack.apis.common.job_types import Job, JobStatus
+from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
+from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
+
+MOCK_DATASET_ID = "default/test-dataset"
+MOCK_BENCHMARK_ID = "test-benchmark"
+
+
+class TestNVIDIAEvalImpl(unittest.TestCase):
+    def setUp(self):
+        os.environ["NVIDIA_EVALUATOR_URL"] = "http://nemo.test"
+
+        # Create mock APIs
+        self.datasetio_api = MagicMock()
+        self.datasets_api = MagicMock()
+        self.scoring_api = MagicMock()
+        self.inference_api = MagicMock()
+        self.agents_api = MagicMock()
+
+        self.config = NVIDIAEvalConfig(
+            evaluator_url=os.environ["NVIDIA_EVALUATOR_URL"],
+        )
+
+        self.eval_impl = NVIDIAEvalImpl(
+            config=self.config,
+            datasetio_api=self.datasetio_api,
+            datasets_api=self.datasets_api,
+            scoring_api=self.scoring_api,
+            inference_api=self.inference_api,
+            agents_api=self.agents_api,
+        )
+
+        # Mock the HTTP request methods
+        self.evaluator_get_patcher = patch(
+            "llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_get"
+        )
+        self.evaluator_post_patcher = patch(
+            "llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post"
+        )
+
+        self.mock_evaluator_get = self.evaluator_get_patcher.start()
+        self.mock_evaluator_post = self.evaluator_post_patcher.start()
+
+    def tearDown(self):
+        """Clean up after each test."""
+        self.evaluator_get_patcher.stop()
+        self.evaluator_post_patcher.stop()
+
+    def _assert_request_body(self, expected_json):
+        """Helper method to verify request body in Evaluator POST request is correct"""
+        call_args = self.mock_evaluator_post.call_args
+        actual_json = call_args[0][1]
+
+        # Check that all expected keys contain the expected values in the actual JSON
+        for key, value in expected_json.items():
+            assert key in actual_json, f"Key '{key}' missing in actual JSON"
+
+            if isinstance(value, dict):
+                for nested_key, nested_value in value.items():
+                    assert nested_key in actual_json[key], f"Nested key '{nested_key}' missing in actual JSON['{key}']"
+                    assert actual_json[key][nested_key] == nested_value, f"Value mismatch for '{key}.{nested_key}'"
+            else:
+                assert actual_json[key] == value, f"Value mismatch for '{key}'"
+
+    @pytest.fixture(autouse=True)
+    def inject_fixtures(self, run_async):
+        self.run_async = run_async
+
+    def test_register_benchmark(self):
+        eval_config = {
+            "type": "custom",
+            "params": {"parallelism": 8},
+            "tasks": {
+                "qa": {
+                    "type": "completion",
+                    "params": {"template": {"prompt": "{{prompt}}", "max_tokens": 200}},
+                    "dataset": {"files_url": f"hf://datasets/{MOCK_DATASET_ID}/testing/testing.jsonl"},
+                    "metrics": {"bleu": {"type": "bleu", "params": {"references": ["{{ideal_response}}"]}}},
+                }
+            },
+        }
+
+        benchmark = Benchmark(
+            provider_id="nvidia",
+            type="benchmark",
+            identifier=MOCK_BENCHMARK_ID,
+            dataset_id=MOCK_DATASET_ID,
+            scoring_functions=["basic::equality"],
+            metadata=eval_config,
+        )
+
+        # Mock Evaluator API response
+        mock_evaluator_response = {"id": MOCK_BENCHMARK_ID, "status": "created"}
+        self.mock_evaluator_post.return_value = mock_evaluator_response
+
+        # Register the benchmark
+        self.run_async(self.eval_impl.register_benchmark(benchmark))
+
+        # Verify the Evaluator API was called correctly
+        self.mock_evaluator_post.assert_called_once()
+        self._assert_request_body({"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config})
+
+    def test_run_eval(self):
+        benchmark_config = BenchmarkConfig(
+            eval_candidate=ModelCandidate(
+                type="model",
+                model=CoreModelId.llama3_1_8b_instruct.value,
+                sampling_params=SamplingParams(max_tokens=100, temperature=0.7),
+            )
+        )
+
+        # Mock Evaluator API response
+        mock_evaluator_response = {"id": "job-123", "status": "created"}
+        self.mock_evaluator_post.return_value = mock_evaluator_response
+
+        # Run the Evaluation job
+        result = self.run_async(
+            self.eval_impl.run_eval(benchmark_id=MOCK_BENCHMARK_ID, benchmark_config=benchmark_config)
+        )
+
+        # Verify the Evaluator API was called correctly
+        self.mock_evaluator_post.assert_called_once()
+        self._assert_request_body(
+            {
+                "config": f"nvidia/{MOCK_BENCHMARK_ID}",
+                "target": {"type": "model", "model": "meta/llama-3.1-8b-instruct"},
+            }
+        )
+
+        # Verify the result
+        assert isinstance(result, Job)
+        assert result.job_id == "job-123"
+        assert result.status == JobStatus.in_progress
+
+    def test_job_status(self):
+        # Mock Evaluator API response
+        mock_evaluator_response = {"id": "job-123", "status": "completed"}
+        self.mock_evaluator_get.return_value = mock_evaluator_response
+
+        # Get the Evaluation job
+        result = self.run_async(self.eval_impl.job_status(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123"))
+
+        # Verify the result
+        assert isinstance(result, Job)
+        assert result.job_id == "job-123"
+        assert result.status == JobStatus.completed
+
+        # Verify the API was called correctly
+        self.mock_evaluator_get.assert_called_once_with(f"/v1/evaluation/jobs/{result.job_id}")
+
+    def test_job_cancel(self):
+        # Mock Evaluator API response
+        mock_evaluator_response = {"id": "job-123", "status": "cancelled"}
+        self.mock_evaluator_post.return_value = mock_evaluator_response
+
+        # Cancel the Evaluation job
+        self.run_async(self.eval_impl.job_cancel(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123"))
+
+        # Verify the API was called correctly
+        self.mock_evaluator_post.assert_called_once_with("/v1/evaluation/jobs/job-123/cancel", {})
+
+    def test_job_result(self):
+        # Mock Evaluator API responses
+        mock_job_status_response = {"id": "job-123", "status": "completed"}
+        mock_job_results_response = {
+            "id": "job-123",
+            "status": "completed",
+            "results": {MOCK_BENCHMARK_ID: {"score": 0.85, "details": {"accuracy": 0.85, "f1": 0.84}}},
+        }
+        self.mock_evaluator_get.side_effect = [
+            mock_job_status_response,  # First call to retrieve job
+            mock_job_results_response,  # Second call to retrieve job results
+        ]
+
+        # Get the Evaluation job results
+        result = self.run_async(self.eval_impl.job_result(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123"))
+
+        # Verify the result
+        assert isinstance(result, EvaluateResponse)
+        assert MOCK_BENCHMARK_ID in result.scores
+        assert result.scores[MOCK_BENCHMARK_ID].aggregated_results["results"][MOCK_BENCHMARK_ID]["score"] == 0.85
+
+        # Verify the API was called correctly
+        assert self.mock_evaluator_get.call_count == 2
+        self.mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123")
+        self.mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123/results")
diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py
index cb1b92fba..cc33f7609 100644
--- a/tests/unit/providers/nvidia/test_parameters.py
+++ b/tests/unit/providers/nvidia/test_parameters.py
@@ -10,14 +10,17 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig
-from llama_stack_client.types.post_training_supervised_fine_tune_params import (
-    TrainingConfig,
-    TrainingConfigDataConfig,
-    TrainingConfigEfficiencyConfig,
-    TrainingConfigOptimizerConfig,
-)
 
+from llama_stack.apis.post_training.post_training import (
+    DataConfig,
+    DatasetFormat,
+    EfficiencyConfig,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    TrainingConfig,
+)
+from llama_stack.distribution.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
     NvidiaPostTrainingAdapter,
     NvidiaPostTrainingConfig,
@@ -66,11 +69,8 @@ class TestNvidiaParameters(unittest.TestCase):
 
     def test_customizer_parameters_passed(self):
         """Test scenario 1: When an optional parameter is passed and value is correctly set."""
-        custom_adapter_dim = 32  # Different from default of 8
         algorithm_config = LoraFinetuningConfig(
             type="LoRA",
-            adapter_dim=custom_adapter_dim,
-            adapter_dropout=0.2,
             apply_lora_to_mlp=True,
             apply_lora_to_output=True,
             alpha=16,
@@ -78,8 +78,15 @@ class TestNvidiaParameters(unittest.TestCase):
             lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
         )
 
-        data_config = TrainingConfigDataConfig(dataset_id="test-dataset", batch_size=16)
-        optimizer_config = TrainingConfigOptimizerConfig(lr=0.0002)
+        data_config = DataConfig(
+            dataset_id="test-dataset", batch_size=16, shuffle=False, data_format=DatasetFormat.instruct
+        )
+        optimizer_config = OptimizerConfig(
+            optimizer_type=OptimizerType.adam,
+            lr=0.0002,
+            weight_decay=0.01,
+            num_warmup_steps=100,
+        )
         training_config = TrainingConfig(
             n_epochs=3,
             data_config=data_config,
@@ -95,7 +102,7 @@ class TestNvidiaParameters(unittest.TestCase):
                     model="meta-llama/Llama-3.1-8B-Instruct",
                     checkpoint_dir="",
                     algorithm_config=algorithm_config,
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={},
                     hyperparam_search_config={},
                 )
@@ -114,7 +121,7 @@ class TestNvidiaParameters(unittest.TestCase):
         self._assert_request_params(
             {
                 "hyperparameters": {
-                    "lora": {"adapter_dim": custom_adapter_dim, "adapter_dropout": 0.2, "alpha": 16},
+                    "lora": {"alpha": 16},
                     "epochs": 3,
                     "learning_rate": 0.0002,
                     "batch_size": 16,
@@ -124,14 +131,12 @@ class TestNvidiaParameters(unittest.TestCase):
 
     def test_required_parameters_passed(self):
         """Test scenario 2: When required parameters are passed."""
-        required_model = "meta-llama/Llama-3.1-8B-Instruct"
+        required_model = "meta/llama-3.2-1b-instruct@v1.0.0+L40"
         required_dataset_id = "required-dataset"
         required_job_uuid = "required-job"
 
         algorithm_config = LoraFinetuningConfig(
             type="LoRA",
-            adapter_dim=16,
-            adapter_dropout=0.1,
             apply_lora_to_mlp=True,
             apply_lora_to_output=True,
             alpha=16,
@@ -139,12 +144,16 @@ class TestNvidiaParameters(unittest.TestCase):
             lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
         )
 
-        data_config = TrainingConfigDataConfig(
-            dataset_id=required_dataset_id,  # Required parameter
-            batch_size=8,
+        data_config = DataConfig(
+            dataset_id=required_dataset_id, batch_size=8, shuffle=False, data_format=DatasetFormat.instruct
         )
 
-        optimizer_config = TrainingConfigOptimizerConfig(lr=0.0001)
+        optimizer_config = OptimizerConfig(
+            optimizer_type=OptimizerType.adam,
+            lr=0.0001,
+            weight_decay=0.01,
+            num_warmup_steps=100,
+        )
 
         training_config = TrainingConfig(
             n_epochs=1,
@@ -161,7 +170,7 @@ class TestNvidiaParameters(unittest.TestCase):
                     model=required_model,  # Required parameter
                     checkpoint_dir="",
                     algorithm_config=algorithm_config,
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={},
                     hyperparam_search_config={},
                 )
@@ -181,29 +190,29 @@ class TestNvidiaParameters(unittest.TestCase):
         self.mock_make_request.assert_called_once()
         call_args = self.mock_make_request.call_args
 
-        assert call_args[1]["json"]["config"] == "meta/llama-3.1-8b-instruct"
+        assert call_args[1]["json"]["config"] == required_model
         assert call_args[1]["json"]["dataset"]["name"] == required_dataset_id
 
     def test_unsupported_parameters_warning(self):
         """Test that warnings are raised for unsupported parameters."""
-        data_config = TrainingConfigDataConfig(
+        data_config = DataConfig(
             dataset_id="test-dataset",
             batch_size=8,
             # Unsupported parameters
             shuffle=True,
-            data_format="instruct",
+            data_format=DatasetFormat.instruct,
             validation_dataset_id="val-dataset",
         )
 
-        optimizer_config = TrainingConfigOptimizerConfig(
+        optimizer_config = OptimizerConfig(
             lr=0.0001,
             weight_decay=0.01,
             # Unsupported parameters
-            optimizer_type="adam",
+            optimizer_type=OptimizerType.adam,
             num_warmup_steps=100,
         )
 
-        efficiency_config = TrainingConfigEfficiencyConfig(
+        efficiency_config = EfficiencyConfig(
             enable_activation_checkpointing=True  # Unsupported parameter
         )
 
@@ -230,15 +239,13 @@ class TestNvidiaParameters(unittest.TestCase):
                     checkpoint_dir="test-dir",  # Unsupported parameter
                     algorithm_config=LoraFinetuningConfig(
                         type="LoRA",
-                        adapter_dim=16,
-                        adapter_dropout=0.1,
                         apply_lora_to_mlp=True,
                         apply_lora_to_output=True,
                         alpha=16,
                         rank=16,
                         lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
                     ),
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={"test": "value"},  # Unsupported parameter
                     hyperparam_search_config={"test": "value"},  # Unsupported parameter
                 )
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index e7e1cb3dc..8c74f178b 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import json
 import os
 import unittest
 from typing import Any
@@ -139,8 +138,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase):
             data={
                 "model": shield_id,
                 "messages": [
-                    json.loads(messages[0].model_dump_json()),
-                    json.loads(messages[1].model_dump_json()),
+                    {"role": "user", "content": "Hello, how are you?"},
+                    {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
                 ],
                 "temperature": 1.0,
                 "top_p": 1,
@@ -193,8 +192,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase):
             data={
                 "model": shield_id,
                 "messages": [
-                    json.loads(messages[0].model_dump_json()),
-                    json.loads(messages[1].model_dump_json()),
+                    {"role": "user", "content": "Hello, how are you?"},
+                    {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
                 ],
                 "temperature": 1.0,
                 "top_p": 1,
@@ -269,8 +268,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase):
             data={
                 "model": shield_id,
                 "messages": [
-                    json.loads(messages[0].model_dump_json()),
-                    json.loads(messages[1].model_dump_json()),
+                    {"role": "user", "content": "Hello, how are you?"},
+                    {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
                 ],
                 "temperature": 1.0,
                 "top_p": 1,
diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
index 7ce89144b..97ca02fba 100644
--- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
+++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
@@ -10,13 +10,19 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig, QatFinetuningConfig
-from llama_stack_client.types.post_training_supervised_fine_tune_params import (
-    TrainingConfig,
-    TrainingConfigDataConfig,
-    TrainingConfigOptimizerConfig,
-)
 
+from llama_stack.apis.models import Model, ModelType
+from llama_stack.apis.post_training.post_training import (
+    DataConfig,
+    DatasetFormat,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
+from llama_stack.distribution.library_client import convert_pydantic_to_json_value
+from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAConfig, NVIDIAInferenceAdapter
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
     ListNvidiaPostTrainingJobs,
     NvidiaPostTrainingAdapter,
@@ -40,8 +46,22 @@ class TestNvidiaPostTraining(unittest.TestCase):
         )
         self.mock_make_request = self.make_request_patcher.start()
 
+        # Mock the inference client
+        inference_config = NVIDIAConfig(base_url=os.environ["NVIDIA_BASE_URL"], api_key=None)
+        self.inference_adapter = NVIDIAInferenceAdapter(inference_config)
+
+        self.mock_client = unittest.mock.MagicMock()
+        self.mock_client.chat.completions.create = unittest.mock.AsyncMock()
+        self.inference_mock_make_request = self.mock_client.chat.completions.create
+        self.inference_make_request_patcher = patch(
+            "llama_stack.providers.remote.inference.nvidia.nvidia.NVIDIAInferenceAdapter._get_client",
+            return_value=self.mock_client,
+        )
+        self.inference_make_request_patcher.start()
+
     def tearDown(self):
         self.make_request_patcher.stop()
+        self.inference_make_request_patcher.stop()
 
     @pytest.fixture(autouse=True)
     def inject_fixtures(self, run_async):
@@ -105,7 +125,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
                 "batch_size": 16,
                 "epochs": 2,
                 "learning_rate": 0.0001,
-                "lora": {"adapter_dim": 16, "adapter_dropout": 0.1},
+                "lora": {"alpha": 16},
             },
             "output_model": "default/job-1234",
             "status": "created",
@@ -116,8 +136,6 @@ class TestNvidiaPostTraining(unittest.TestCase):
 
         algorithm_config = LoraFinetuningConfig(
             type="LoRA",
-            adapter_dim=16,
-            adapter_dropout=0.1,
             apply_lora_to_mlp=True,
             apply_lora_to_output=True,
             alpha=16,
@@ -125,10 +143,15 @@ class TestNvidiaPostTraining(unittest.TestCase):
             lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
         )
 
-        data_config = TrainingConfigDataConfig(dataset_id="sample-basic-test", batch_size=16)
+        data_config = DataConfig(
+            dataset_id="sample-basic-test", batch_size=16, shuffle=False, data_format=DatasetFormat.instruct
+        )
 
-        optimizer_config = TrainingConfigOptimizerConfig(
+        optimizer_config = OptimizerConfig(
+            optimizer_type=OptimizerType.adam,
             lr=0.0001,
+            weight_decay=0.01,
+            num_warmup_steps=100,
         )
 
         training_config = TrainingConfig(
@@ -142,10 +165,10 @@ class TestNvidiaPostTraining(unittest.TestCase):
             training_job = self.run_async(
                 self.adapter.supervised_fine_tune(
                     job_uuid="1234",
-                    model="meta-llama/Llama-3.1-8B-Instruct",
+                    model="meta/llama-3.2-1b-instruct@v1.0.0+L40",
                     checkpoint_dir="",
                     algorithm_config=algorithm_config,
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={},
                     hyperparam_search_config={},
                 )
@@ -161,7 +184,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
             "POST",
             "/v1/customization/jobs",
             expected_json={
-                "config": "meta/llama-3.1-8b-instruct",
+                "config": "meta/llama-3.2-1b-instruct@v1.0.0+L40",
                 "dataset": {"name": "sample-basic-test", "namespace": "default"},
                 "hyperparameters": {
                     "training_type": "sft",
@@ -169,16 +192,22 @@ class TestNvidiaPostTraining(unittest.TestCase):
                     "epochs": 2,
                     "batch_size": 16,
                     "learning_rate": 0.0001,
-                    "lora": {"alpha": 16, "adapter_dim": 16, "adapter_dropout": 0.1},
+                    "weight_decay": 0.01,
+                    "lora": {"alpha": 16},
                 },
             },
         )
 
     def test_supervised_fine_tune_with_qat(self):
-        algorithm_config = QatFinetuningConfig(type="QAT", quantizer_name="quantizer_name", group_size=1)
-        data_config = TrainingConfigDataConfig(dataset_id="sample-basic-test", batch_size=16)
-        optimizer_config = TrainingConfigOptimizerConfig(
+        algorithm_config = QATFinetuningConfig(type="QAT", quantizer_name="quantizer_name", group_size=1)
+        data_config = DataConfig(
+            dataset_id="sample-basic-test", batch_size=16, shuffle=False, data_format=DatasetFormat.instruct
+        )
+        optimizer_config = OptimizerConfig(
+            optimizer_type=OptimizerType.adam,
             lr=0.0001,
+            weight_decay=0.01,
+            num_warmup_steps=100,
         )
         training_config = TrainingConfig(
             n_epochs=2,
@@ -190,45 +219,58 @@ class TestNvidiaPostTraining(unittest.TestCase):
             self.run_async(
                 self.adapter.supervised_fine_tune(
                     job_uuid="1234",
-                    model="meta-llama/Llama-3.1-8B-Instruct",
+                    model="meta/llama-3.2-1b-instruct@v1.0.0+L40",
                     checkpoint_dir="",
                     algorithm_config=algorithm_config,
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={},
                     hyperparam_search_config={},
                 )
             )
 
     def test_get_training_job_status(self):
-        self.mock_make_request.return_value = {
-            "created_at": "2024-12-09T04:06:28.580220",
-            "updated_at": "2024-12-09T04:21:19.852832",
-            "status": "completed",
-            "steps_completed": 1210,
-            "epochs_completed": 2,
-            "percentage_done": 100.0,
-            "best_epoch": 2,
-            "train_loss": 1.718016266822815,
-            "val_loss": 1.8661999702453613,
-        }
+        customizer_status_to_job_status = [
+            ("running", "in_progress"),
+            ("completed", "completed"),
+            ("failed", "failed"),
+            ("cancelled", "cancelled"),
+            ("pending", "scheduled"),
+            ("unknown", "scheduled"),
+        ]
 
-        job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2"
+        for customizer_status, expected_status in customizer_status_to_job_status:
+            with self.subTest(customizer_status=customizer_status, expected_status=expected_status):
+                self.mock_make_request.return_value = {
+                    "created_at": "2024-12-09T04:06:28.580220",
+                    "updated_at": "2024-12-09T04:21:19.852832",
+                    "status": customizer_status,
+                    "steps_completed": 1210,
+                    "epochs_completed": 2,
+                    "percentage_done": 100.0,
+                    "best_epoch": 2,
+                    "train_loss": 1.718016266822815,
+                    "val_loss": 1.8661999702453613,
+                }
 
-        status = self.run_async(self.adapter.get_training_job_status(job_uuid=job_id))
+                job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2"
 
-        assert isinstance(status, NvidiaPostTrainingJobStatusResponse)
-        assert status.status.value == "completed"
-        assert status.steps_completed == 1210
-        assert status.epochs_completed == 2
-        assert status.percentage_done == 100.0
-        assert status.best_epoch == 2
-        assert status.train_loss == 1.718016266822815
-        assert status.val_loss == 1.8661999702453613
+                status = self.run_async(self.adapter.get_training_job_status(job_uuid=job_id))
 
-        self.mock_make_request.assert_called_once()
-        self._assert_request(
-            self.mock_make_request, "GET", f"/v1/customization/jobs/{job_id}/status", expected_params={"job_id": job_id}
-        )
+                assert isinstance(status, NvidiaPostTrainingJobStatusResponse)
+                assert status.status.value == expected_status
+                assert status.steps_completed == 1210
+                assert status.epochs_completed == 2
+                assert status.percentage_done == 100.0
+                assert status.best_epoch == 2
+                assert status.train_loss == 1.718016266822815
+                assert status.val_loss == 1.8661999702453613
+
+                self._assert_request(
+                    self.mock_make_request,
+                    "GET",
+                    f"/v1/customization/jobs/{job_id}/status",
+                    expected_params={"job_id": job_id},
+                )
 
     def test_get_training_jobs(self):
         job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2"
@@ -290,6 +332,31 @@ class TestNvidiaPostTraining(unittest.TestCase):
             expected_params={"job_id": job_id},
         )
 
+    def test_inference_register_model(self):
+        model_id = "default/job-1234"
+        model_type = ModelType.llm
+        model = Model(
+            identifier=model_id,
+            provider_id="nvidia",
+            provider_model_id=model_id,
+            provider_resource_id=model_id,
+            model_type=model_type,
+        )
+        result = self.run_async(self.inference_adapter.register_model(model))
+        assert result == model
+        assert len(self.inference_adapter.alias_to_provider_id_map) > 1
+        assert self.inference_adapter.get_provider_model_id(model.provider_model_id) == model_id
+
+        with patch.object(self.inference_adapter, "chat_completion") as mock_chat_completion:
+            self.run_async(
+                self.inference_adapter.chat_completion(
+                    model_id=model_id,
+                    messages=[{"role": "user", "content": "Hello, model"}],
+                )
+            )
+
+            mock_chat_completion.assert_called()
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/unit/providers/utils/__init__.py b/tests/unit/providers/utils/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/unit/providers/utils/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py
new file mode 100644
index 000000000..4c75b8a2f
--- /dev/null
+++ b/tests/unit/providers/utils/inference/test_openai_compat.py
@@ -0,0 +1,116 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+
+from llama_stack.apis.common.content_types import TextContentItem
+from llama_stack.apis.inference.inference import (
+    CompletionMessage,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAISystemMessageParam,
+    OpenAIUserMessageParam,
+    SystemMessage,
+    UserMessage,
+)
+from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
+from llama_stack.providers.utils.inference.openai_compat import (
+    convert_message_to_openai_dict,
+    openai_messages_to_messages,
+)
+
+
+@pytest.mark.asyncio
+async def test_convert_message_to_openai_dict():
+    message = UserMessage(content=[TextContentItem(text="Hello, world!")], role="user")
+    assert await convert_message_to_openai_dict(message) == {
+        "role": "user",
+        "content": [{"type": "text", "text": "Hello, world!"}],
+    }
+
+
+# Test convert_message_to_openai_dict with a tool call
+@pytest.mark.asyncio
+async def test_convert_message_to_openai_dict_with_tool_call():
+    message = CompletionMessage(
+        content="",
+        tool_calls=[
+            ToolCall(call_id="123", tool_name="test_tool", arguments_json='{"foo": "bar"}', arguments={"foo": "bar"})
+        ],
+        stop_reason=StopReason.end_of_turn,
+    )
+
+    openai_dict = await convert_message_to_openai_dict(message)
+
+    assert openai_dict == {
+        "role": "assistant",
+        "content": [{"type": "text", "text": ""}],
+        "tool_calls": [
+            {"id": "123", "type": "function", "function": {"name": "test_tool", "arguments": '{"foo": "bar"}'}}
+        ],
+    }
+
+
+@pytest.mark.asyncio
+async def test_convert_message_to_openai_dict_with_builtin_tool_call():
+    message = CompletionMessage(
+        content="",
+        tool_calls=[
+            ToolCall(
+                call_id="123",
+                tool_name=BuiltinTool.brave_search,
+                arguments_json='{"foo": "bar"}',
+                arguments={"foo": "bar"},
+            )
+        ],
+        stop_reason=StopReason.end_of_turn,
+    )
+
+    openai_dict = await convert_message_to_openai_dict(message)
+
+    assert openai_dict == {
+        "role": "assistant",
+        "content": [{"type": "text", "text": ""}],
+        "tool_calls": [
+            {"id": "123", "type": "function", "function": {"name": "brave_search", "arguments": '{"foo": "bar"}'}}
+        ],
+    }
+
+
+@pytest.mark.asyncio
+async def test_openai_messages_to_messages_with_content_str():
+    openai_messages = [
+        OpenAISystemMessageParam(content="system message"),
+        OpenAIUserMessageParam(content="user message"),
+        OpenAIAssistantMessageParam(content="assistant message"),
+    ]
+
+    llama_messages = openai_messages_to_messages(openai_messages)
+    assert len(llama_messages) == 3
+    assert isinstance(llama_messages[0], SystemMessage)
+    assert isinstance(llama_messages[1], UserMessage)
+    assert isinstance(llama_messages[2], CompletionMessage)
+    assert llama_messages[0].content == "system message"
+    assert llama_messages[1].content == "user message"
+    assert llama_messages[2].content == "assistant message"
+
+
+@pytest.mark.asyncio
+async def test_openai_messages_to_messages_with_content_list():
+    openai_messages = [
+        OpenAISystemMessageParam(content=[OpenAIChatCompletionContentPartTextParam(text="system message")]),
+        OpenAIUserMessageParam(content=[OpenAIChatCompletionContentPartTextParam(text="user message")]),
+        OpenAIAssistantMessageParam(content=[OpenAIChatCompletionContentPartTextParam(text="assistant message")]),
+    ]
+
+    llama_messages = openai_messages_to_messages(openai_messages)
+    assert len(llama_messages) == 3
+    assert isinstance(llama_messages[0], SystemMessage)
+    assert isinstance(llama_messages[1], UserMessage)
+    assert isinstance(llama_messages[2], CompletionMessage)
+    assert llama_messages[0].content[0].text == "system message"
+    assert llama_messages[1].content[0].text == "user message"
+    assert llama_messages[2].content[0].text == "assistant message"
diff --git a/tests/unit/providers/utils/memory/__init__.py b/tests/unit/providers/utils/memory/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/unit/providers/utils/memory/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
new file mode 100644
index 000000000..4a3c33a6b
--- /dev/null
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -0,0 +1,145 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from llama_stack.apis.common.content_types import URL, TextContentItem
+from llama_stack.apis.tools import RAGDocument
+from llama_stack.providers.utils.memory.vector_store import content_from_doc
+
+
+@pytest.mark.asyncio
+async def test_content_from_doc_with_url():
+    """Test extracting content from RAGDocument with URL content."""
+    mock_url = URL(uri="https://example.com")
+    mock_doc = RAGDocument(document_id="foo", content=mock_url)
+
+    mock_response = MagicMock()
+    mock_response.text = "Sample content from URL"
+
+    with patch("httpx.AsyncClient") as mock_client:
+        mock_instance = AsyncMock()
+        mock_instance.get.return_value = mock_response
+        mock_client.return_value.__aenter__.return_value = mock_instance
+
+        result = await content_from_doc(mock_doc)
+
+        assert result == "Sample content from URL"
+        mock_instance.get.assert_called_once_with(mock_url.uri)
+
+
+@pytest.mark.asyncio
+async def test_content_from_doc_with_pdf_url():
+    """Test extracting content from RAGDocument with URL pointing to a PDF."""
+    mock_url = URL(uri="https://example.com/document.pdf")
+    mock_doc = RAGDocument(document_id="foo", content=mock_url, mime_type="application/pdf")
+
+    mock_response = MagicMock()
+    mock_response.content = b"PDF binary data"
+
+    with (
+        patch("httpx.AsyncClient") as mock_client,
+        patch("llama_stack.providers.utils.memory.vector_store.parse_pdf") as mock_parse_pdf,
+    ):
+        mock_instance = AsyncMock()
+        mock_instance.get.return_value = mock_response
+        mock_client.return_value.__aenter__.return_value = mock_instance
+        mock_parse_pdf.return_value = "Extracted PDF content"
+
+        result = await content_from_doc(mock_doc)
+
+        assert result == "Extracted PDF content"
+        mock_instance.get.assert_called_once_with(mock_url.uri)
+        mock_parse_pdf.assert_called_once_with(b"PDF binary data")
+
+
+@pytest.mark.asyncio
+async def test_content_from_doc_with_data_url():
+    """Test extracting content from RAGDocument with data URL content."""
+    data_url = "data:text/plain;base64,SGVsbG8gV29ybGQ="  # "Hello World" base64 encoded
+    mock_url = URL(uri=data_url)
+    mock_doc = RAGDocument(document_id="foo", content=mock_url)
+
+    with patch("llama_stack.providers.utils.memory.vector_store.content_from_data") as mock_content_from_data:
+        mock_content_from_data.return_value = "Hello World"
+
+        result = await content_from_doc(mock_doc)
+
+        assert result == "Hello World"
+        mock_content_from_data.assert_called_once_with(data_url)
+
+
+@pytest.mark.asyncio
+async def test_content_from_doc_with_string():
+    """Test extracting content from RAGDocument with string content."""
+    content_string = "This is plain text content"
+    mock_doc = RAGDocument(document_id="foo", content=content_string)
+
+    result = await content_from_doc(mock_doc)
+
+    assert result == content_string
+
+
+@pytest.mark.asyncio
+async def test_content_from_doc_with_string_url():
+    """Test extracting content from RAGDocument with string URL content."""
+    url_string = "https://example.com"
+    mock_doc = RAGDocument(document_id="foo", content=url_string)
+
+    mock_response = MagicMock()
+    mock_response.text = "Sample content from URL string"
+
+    with patch("httpx.AsyncClient") as mock_client:
+        mock_instance = AsyncMock()
+        mock_instance.get.return_value = mock_response
+        mock_client.return_value.__aenter__.return_value = mock_instance
+
+        result = await content_from_doc(mock_doc)
+
+        assert result == "Sample content from URL string"
+        mock_instance.get.assert_called_once_with(url_string)
+
+
+@pytest.mark.asyncio
+async def test_content_from_doc_with_string_pdf_url():
+    """Test extracting content from RAGDocument with string URL pointing to a PDF."""
+    url_string = "https://example.com/document.pdf"
+    mock_doc = RAGDocument(document_id="foo", content=url_string, mime_type="application/pdf")
+
+    mock_response = MagicMock()
+    mock_response.content = b"PDF binary data"
+
+    with (
+        patch("httpx.AsyncClient") as mock_client,
+        patch("llama_stack.providers.utils.memory.vector_store.parse_pdf") as mock_parse_pdf,
+    ):
+        mock_instance = AsyncMock()
+        mock_instance.get.return_value = mock_response
+        mock_client.return_value.__aenter__.return_value = mock_instance
+        mock_parse_pdf.return_value = "Extracted PDF content from string URL"
+
+        result = await content_from_doc(mock_doc)
+
+        assert result == "Extracted PDF content from string URL"
+        mock_instance.get.assert_called_once_with(url_string)
+        mock_parse_pdf.assert_called_once_with(b"PDF binary data")
+
+
+@pytest.mark.asyncio
+async def test_content_from_doc_with_interleaved_content():
+    """Test extracting content from RAGDocument with InterleavedContent (the new case added in the commit)."""
+    interleaved_content = [TextContentItem(text="First item"), TextContentItem(text="Second item")]
+    mock_doc = RAGDocument(document_id="foo", content=interleaved_content)
+
+    with patch("llama_stack.providers.utils.memory.vector_store.interleaved_content_as_str") as mock_interleaved:
+        mock_interleaved.return_value = "First item\nSecond item"
+
+        result = await content_from_doc(mock_doc)
+
+        assert result == "First item\nSecond item"
+        mock_interleaved.assert_called_once_with(interleaved_content)
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
new file mode 100644
index 000000000..67f8a138f
--- /dev/null
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -0,0 +1,163 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+#
+# ModelRegistryHelper provides mixin functionality for registering and
+# unregistering models. It maintains a mapping of model ID / aliases to
+# provider model IDs.
+#
+# Test cases -
+#  - Looking up an alias that does not exist should return None.
+#  - Registering a model + provider ID should add the model to the registry. If
+#    provider ID is known or an alias for a provider ID.
+#  - Registering an existing model should return an error. Unless it's a
+#    dulicate entry.
+#  - Unregistering a model should remove it from the registry.
+#  - Unregistering a model that does not exist should return an error.
+#  - Supported model ID and their aliases are registered during initialization.
+#    Only aliases are added afterwards.
+#
+# Questions -
+#  - Should we be allowed to register models w/o provider model IDs? No.
+#    According to POST /v1/models, required params are
+#      - identifier
+#      - provider_resource_id
+#      - provider_id
+#      - type
+#      - metadata
+#      - model_type
+#
+#  TODO: llama_model functionality
+#
+
+import pytest
+
+from llama_stack.apis.models.models import Model
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
+
+
+@pytest.fixture
+def known_model() -> Model:
+    return Model(
+        provider_id="provider",
+        identifier="known-model",
+        provider_resource_id="known-provider-id",
+    )
+
+
+@pytest.fixture
+def known_model2() -> Model:
+    return Model(
+        provider_id="provider",
+        identifier="known-model2",
+        provider_resource_id="known-provider-id2",
+    )
+
+
+@pytest.fixture
+def known_provider_model(known_model: Model) -> ProviderModelEntry:
+    return ProviderModelEntry(
+        provider_model_id=known_model.provider_resource_id,
+        aliases=[known_model.model_id],
+    )
+
+
+@pytest.fixture
+def known_provider_model2(known_model2: Model) -> ProviderModelEntry:
+    return ProviderModelEntry(
+        provider_model_id=known_model2.provider_resource_id,
+        # aliases=[],
+    )
+
+
+@pytest.fixture
+def unknown_model() -> Model:
+    return Model(
+        provider_id="provider",
+        identifier="unknown-model",
+        provider_resource_id="unknown-provider-id",
+    )
+
+
+@pytest.fixture
+def helper(known_provider_model: ProviderModelEntry, known_provider_model2: ProviderModelEntry) -> ModelRegistryHelper:
+    return ModelRegistryHelper([known_provider_model, known_provider_model2])
+
+
+@pytest.mark.asyncio
+async def test_lookup_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
+    assert helper.get_provider_model_id(unknown_model.model_id) is None
+
+
+@pytest.mark.asyncio
+async def test_register_unknown_provider_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
+    with pytest.raises(ValueError):
+        await helper.register_model(unknown_model)
+
+
+@pytest.mark.asyncio
+async def test_register_model(helper: ModelRegistryHelper, known_model: Model) -> None:
+    model = Model(
+        provider_id=known_model.provider_id,
+        identifier="new-model",
+        provider_resource_id=known_model.provider_resource_id,
+    )
+    assert helper.get_provider_model_id(model.model_id) is None
+    await helper.register_model(model)
+    assert helper.get_provider_model_id(model.model_id) == model.provider_resource_id
+
+
+@pytest.mark.asyncio
+async def test_register_model_from_alias(helper: ModelRegistryHelper, known_model: Model) -> None:
+    model = Model(
+        provider_id=known_model.provider_id,
+        identifier="new-model",
+        provider_resource_id=known_model.model_id,  # use known model's id as an alias for the supported model id
+    )
+    assert helper.get_provider_model_id(model.model_id) is None
+    await helper.register_model(model)
+    assert helper.get_provider_model_id(model.model_id) == known_model.provider_resource_id
+
+
+@pytest.mark.asyncio
+async def test_register_model_existing(helper: ModelRegistryHelper, known_model: Model) -> None:
+    await helper.register_model(known_model)
+    assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_resource_id
+
+
+@pytest.mark.asyncio
+async def test_register_model_existing_different(
+    helper: ModelRegistryHelper, known_model: Model, known_model2: Model
+) -> None:
+    known_model.provider_resource_id = known_model2.provider_resource_id
+    with pytest.raises(ValueError):
+        await helper.register_model(known_model)
+
+
+@pytest.mark.asyncio
+async def test_unregister_model(helper: ModelRegistryHelper, known_model: Model) -> None:
+    await helper.register_model(known_model)  # duplicate entry
+    assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_model_id
+    await helper.unregister_model(known_model.model_id)
+    assert helper.get_provider_model_id(known_model.model_id) is None
+
+
+@pytest.mark.asyncio
+async def test_unregister_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
+    with pytest.raises(ValueError):
+        await helper.unregister_model(unknown_model.model_id)
+
+
+@pytest.mark.asyncio
+async def test_register_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None:
+    assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id
+
+
+@pytest.mark.asyncio
+async def test_unregister_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None:
+    assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id
+    await helper.unregister_model(known_model.provider_resource_id)
+    assert helper.get_provider_model_id(known_model.provider_resource_id) is None
diff --git a/tests/unit/providers/utils/test_scheduler.py b/tests/unit/providers/utils/test_scheduler.py
new file mode 100644
index 000000000..76f0da8ce
--- /dev/null
+++ b/tests/unit/providers/utils/test_scheduler.py
@@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+
+import pytest
+
+from llama_stack.providers.utils.scheduler import JobStatus, Scheduler
+
+
+@pytest.mark.asyncio
+async def test_scheduler_unknown_backend():
+    with pytest.raises(ValueError):
+        Scheduler(backend="unknown")
+
+
+@pytest.mark.asyncio
+async def test_scheduler_naive():
+    sched = Scheduler()
+
+    # make sure the scheduler starts empty
+    with pytest.raises(ValueError):
+        sched.get_job("unknown")
+    assert sched.get_jobs() == []
+
+    called = False
+
+    # schedule a job that will exercise the handlers
+    async def job_handler(on_log, on_status, on_artifact):
+        nonlocal called
+        called = True
+        # exercise the handlers
+        on_log("test log1")
+        on_log("test log2")
+        on_artifact({"type": "type1", "path": "path1"})
+        on_artifact({"type": "type2", "path": "path2"})
+        on_status(JobStatus.completed)
+
+    job_id = "test_job_id"
+    job_type = "test_job_type"
+    sched.schedule(job_type, job_id, job_handler)
+
+    # make sure the job was properly registered
+    with pytest.raises(ValueError):
+        sched.get_job("unknown")
+    assert sched.get_job(job_id) is not None
+    assert sched.get_jobs() == [sched.get_job(job_id)]
+
+    assert sched.get_jobs("unknown") == []
+    assert sched.get_jobs(job_type) == [sched.get_job(job_id)]
+
+    # now shut the scheduler down and make sure the job ran
+    await sched.shutdown()
+
+    assert called
+
+    job = sched.get_job(job_id)
+    assert job is not None
+
+    assert job.status == JobStatus.completed
+
+    assert job.scheduled_at is not None
+    assert job.started_at is not None
+    assert job.completed_at is not None
+    assert job.scheduled_at < job.started_at < job.completed_at
+
+    assert job.artifacts == [
+        {"type": "type1", "path": "path1"},
+        {"type": "type2", "path": "path2"},
+    ]
+    assert [msg[1] for msg in job.logs] == ["test log1", "test log2"]
+    assert job.logs[0][0] < job.logs[1][0]
+
+
+@pytest.mark.asyncio
+async def test_scheduler_naive_handler_raises():
+    sched = Scheduler()
+
+    async def failing_job_handler(on_log, on_status, on_artifact):
+        on_status(JobStatus.running)
+        raise ValueError("test error")
+
+    job_id = "test_job_id1"
+    job_type = "test_job_type"
+    sched.schedule(job_type, job_id, failing_job_handler)
+
+    job = sched.get_job(job_id)
+    assert job is not None
+
+    # confirm the exception made the job transition to failed state, even
+    # though it was set to `running` before the error
+    for _ in range(10):
+        if job.status == JobStatus.failed:
+            break
+        await asyncio.sleep(0.1)
+    assert job.status == JobStatus.failed
+
+    # confirm that the raised error got registered in log
+    assert job.logs[0][1] == "test error"
+
+    # even after failed job, we can schedule another one
+    called = False
+
+    async def successful_job_handler(on_log, on_status, on_artifact):
+        nonlocal called
+        called = True
+        on_status(JobStatus.completed)
+
+    job_id = "test_job_id2"
+    sched.schedule(job_type, job_id, successful_job_handler)
+
+    await sched.shutdown()
+
+    assert called
+    job = sched.get_job(job_id)
+    assert job is not None
+    assert job.status == JobStatus.completed
diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py
index bc97719c0..607eccb24 100644
--- a/tests/unit/providers/vector_io/test_qdrant.py
+++ b/tests/unit/providers/vector_io/test_qdrant.py
@@ -50,6 +50,7 @@ def mock_vector_db(vector_db_id) -> MagicMock:
     mock_vector_db = MagicMock(spec=VectorDB)
     mock_vector_db.embedding_model = "embedding_model"
     mock_vector_db.identifier = vector_db_id
+    mock_vector_db.embedding_dimension = 384
     return mock_vector_db
 
 
@@ -98,7 +99,7 @@ async def test_qdrant_adapter_returns_expected_chunks(
     response = await qdrant_adapter.query_chunks(
         query=__QUERY,
         vector_db_id=vector_db_id,
-        params={"max_chunks": max_query_chunks},
+        params={"max_chunks": max_query_chunks, "mode": "vector"},
     )
     assert isinstance(response, QueryChunksResponse)
     assert len(response.chunks) == expected_chunks
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 32b60ffa5..010a0ca42 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -57,14 +57,46 @@ async def test_add_chunks(sqlite_vec_index, sample_chunks, sample_embeddings):
 
 
 @pytest.mark.asyncio
-async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension):
+async def test_query_chunks_vector(sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension):
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
     query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
-    response = await sqlite_vec_index.query(query_embedding, k=2, score_threshold=0.0)
+    response = await sqlite_vec_index.query_vector(query_embedding, k=2, score_threshold=0.0)
     assert isinstance(response, QueryChunksResponse)
     assert len(response.chunks) == 2
 
 
+@pytest.mark.asyncio
+async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sample_embeddings):
+    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
+
+    query_string = "Sentence 5"
+    response = await sqlite_vec_index.query_keyword(k=3, score_threshold=0.0, query_string=query_string)
+
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == 3, f"Expected three chunks, but got {len(response.chunks)}"
+
+    non_existent_query_str = "blablabla"
+    response_no_results = await sqlite_vec_index.query_keyword(
+        query_string=non_existent_query_str, k=1, score_threshold=0.0
+    )
+
+    assert isinstance(response_no_results, QueryChunksResponse)
+    assert len(response_no_results.chunks) == 0, f"Expected 0 results, but got {len(response_no_results.chunks)}"
+
+
+@pytest.mark.asyncio
+async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_index, sample_chunks, sample_embeddings):
+    # Re-initialize with a clean index
+    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
+
+    query_str = "Sentence 1 from document 0"  # Should match only one chunk
+    response = await sqlite_vec_index.query_keyword(k=5, score_threshold=0.0, query_string=query_str)
+
+    assert isinstance(response, QueryChunksResponse)
+    assert 0 < len(response.chunks) < 5, f"Expected results between [1, 4], got {len(response.chunks)}"
+    assert any("Sentence 1 from document 0" in chunk.content for chunk in response.chunks), "Expected chunk not found"
+
+
 @pytest.mark.asyncio
 async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dimension):
     """Test that chunk IDs do not conflict across batches when inserting chunks."""
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
new file mode 100644
index 000000000..b9fd8cca4
--- /dev/null
+++ b/tests/unit/rag/test_rag_query.py
@@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
+
+
+class TestRagQuery:
+    @pytest.mark.asyncio
+    async def test_query_raises_on_empty_vector_db_ids(self):
+        rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
+        with pytest.raises(ValueError):
+            await rag_tool.query(content=MagicMock(), vector_db_ids=[])
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 3decc431e..9d6b9ee67 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -8,11 +8,20 @@ import base64
 import mimetypes
 import os
 from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock
 
+import numpy as np
 import pytest
 
 from llama_stack.apis.tools import RAGDocument
-from llama_stack.providers.utils.memory.vector_store import URL, content_from_doc
+from llama_stack.apis.vector_io import Chunk
+from llama_stack.providers.utils.memory.vector_store import (
+    URL,
+    VectorDBWithIndex,
+    _validate_embedding,
+    content_from_doc,
+    make_overlapped_chunks,
+)
 
 DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
 # Depending on the machine, this can get parsed a couple of ways
@@ -36,6 +45,72 @@ def data_url_from_file(file_path: str) -> str:
     return data_url
 
 
+class TestChunk:
+    def test_chunk(self):
+        chunk = Chunk(
+            content="Example chunk content",
+            metadata={"key": "value"},
+            embedding=[0.1, 0.2, 0.3],
+        )
+
+        assert chunk.content == "Example chunk content"
+        assert chunk.metadata == {"key": "value"}
+        assert chunk.embedding == [0.1, 0.2, 0.3]
+
+        chunk_no_embedding = Chunk(
+            content="Example chunk content",
+            metadata={"key": "value"},
+        )
+        assert chunk_no_embedding.embedding is None
+
+
+class TestValidateEmbedding:
+    def test_valid_list_embeddings(self):
+        _validate_embedding([0.1, 0.2, 0.3], 0, 3)
+        _validate_embedding([1, 2, 3], 1, 3)
+        _validate_embedding([0.1, 2, 3.5], 2, 3)
+
+    def test_valid_numpy_embeddings(self):
+        _validate_embedding(np.array([0.1, 0.2, 0.3], dtype=np.float32), 0, 3)
+        _validate_embedding(np.array([0.1, 0.2, 0.3], dtype=np.float64), 1, 3)
+        _validate_embedding(np.array([1, 2, 3], dtype=np.int32), 2, 3)
+        _validate_embedding(np.array([1, 2, 3], dtype=np.int64), 3, 3)
+
+    def test_invalid_embedding_type(self):
+        error_msg = "must be a list or numpy array"
+
+        with pytest.raises(ValueError, match=error_msg):
+            _validate_embedding("not a list", 0, 3)
+
+        with pytest.raises(ValueError, match=error_msg):
+            _validate_embedding(None, 1, 3)
+
+        with pytest.raises(ValueError, match=error_msg):
+            _validate_embedding(42, 2, 3)
+
+    def test_non_numeric_values(self):
+        error_msg = "contains non-numeric values"
+
+        with pytest.raises(ValueError, match=error_msg):
+            _validate_embedding([0.1, "string", 0.3], 0, 3)
+
+        with pytest.raises(ValueError, match=error_msg):
+            _validate_embedding([0.1, None, 0.3], 1, 3)
+
+        with pytest.raises(ValueError, match=error_msg):
+            _validate_embedding([1, {}, 3], 2, 3)
+
+    def test_wrong_dimension(self):
+        with pytest.raises(ValueError, match="has dimension 4, expected 3"):
+            _validate_embedding([0.1, 0.2, 0.3, 0.4], 0, 3)
+
+        with pytest.raises(ValueError, match="has dimension 2, expected 3"):
+            _validate_embedding([0.1, 0.2], 1, 3)
+
+        with pytest.raises(ValueError, match="has dimension 0, expected 3"):
+            _validate_embedding([], 2, 3)
+
+
 class TestVectorStore:
     @pytest.mark.asyncio
     async def test_returns_content_from_pdf_data_uri(self):
@@ -76,3 +151,176 @@ class TestVectorStore:
         )
         content = await content_from_doc(doc)
         assert content in DUMMY_PDF_TEXT_CHOICES
+
+    @pytest.mark.parametrize(
+        "window_len, overlap_len, expected_chunks",
+        [
+            (5, 2, 4),  # Create 4 chunks with window of 5 and overlap of 2
+            (4, 1, 4),  # Create 4 chunks with window of 4 and overlap of 1
+        ],
+    )
+    def test_make_overlapped_chunks(self, window_len, overlap_len, expected_chunks):
+        document_id = "test_doc_123"
+        text = "This is a sample document for testing the chunking behavior"
+        original_metadata = {"source": "test", "date": "2023-01-01", "author": "llama"}
+        len_metadata_tokens = 24  # specific to the metadata above
+
+        chunks = make_overlapped_chunks(document_id, text, window_len, overlap_len, original_metadata)
+
+        assert len(chunks) == expected_chunks
+
+        # Check that each chunk has the right metadata
+        for chunk in chunks:
+            # Original metadata should be preserved
+            assert chunk.metadata["source"] == "test"
+            assert chunk.metadata["date"] == "2023-01-01"
+            assert chunk.metadata["author"] == "llama"
+
+            # New metadata should be added
+            assert chunk.metadata["document_id"] == document_id
+            assert "token_count" in chunk.metadata
+            assert isinstance(chunk.metadata["token_count"], int)
+            assert chunk.metadata["token_count"] > 0
+            assert chunk.metadata["metadata_token_count"] == len_metadata_tokens
+
+    def test_raise_overlapped_chunks_metadata_serialization_error(self):
+        document_id = "test_doc_ex"
+        text = "Some text"
+        window_len = 5
+        overlap_len = 2
+
+        class BadMetadata:
+            def __repr__(self):
+                raise TypeError("Cannot convert to string")
+
+        problematic_metadata = {"bad_metadata_example": BadMetadata()}
+
+        with pytest.raises(ValueError) as excinfo:
+            make_overlapped_chunks(document_id, text, window_len, overlap_len, problematic_metadata)
+
+        assert str(excinfo.value) == "Failed to serialize metadata to string"
+        assert isinstance(excinfo.value.__cause__, TypeError)
+        assert str(excinfo.value.__cause__) == "Cannot convert to string"
+
+
+class TestVectorDBWithIndex:
+    @pytest.mark.asyncio
+    async def test_insert_chunks_without_embeddings(self):
+        mock_vector_db = MagicMock()
+        mock_vector_db.embedding_model = "test-model without embeddings"
+        mock_index = AsyncMock()
+        mock_inference_api = AsyncMock()
+
+        vector_db_with_index = VectorDBWithIndex(
+            vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
+        )
+
+        chunks = [
+            Chunk(content="Test 1", embedding=None, metadata={}),
+            Chunk(content="Test 2", embedding=None, metadata={}),
+        ]
+
+        mock_inference_api.embeddings.return_value.embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
+
+        await vector_db_with_index.insert_chunks(chunks)
+
+        mock_inference_api.embeddings.assert_called_once_with("test-model without embeddings", ["Test 1", "Test 2"])
+        mock_index.add_chunks.assert_called_once()
+        args = mock_index.add_chunks.call_args[0]
+        assert args[0] == chunks
+        assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+
+    @pytest.mark.asyncio
+    async def test_insert_chunks_with_valid_embeddings(self):
+        mock_vector_db = MagicMock()
+        mock_vector_db.embedding_model = "test-model with embeddings"
+        mock_vector_db.embedding_dimension = 3
+        mock_index = AsyncMock()
+        mock_inference_api = AsyncMock()
+
+        vector_db_with_index = VectorDBWithIndex(
+            vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
+        )
+
+        chunks = [
+            Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3], metadata={}),
+            Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}),
+        ]
+
+        await vector_db_with_index.insert_chunks(chunks)
+
+        mock_inference_api.embeddings.assert_not_called()
+        mock_index.add_chunks.assert_called_once()
+        args = mock_index.add_chunks.call_args[0]
+        assert args[0] == chunks
+        assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+
+    @pytest.mark.asyncio
+    async def test_insert_chunks_with_invalid_embeddings(self):
+        mock_vector_db = MagicMock()
+        mock_vector_db.embedding_dimension = 3
+        mock_vector_db.embedding_model = "test-model with invalid embeddings"
+        mock_index = AsyncMock()
+        mock_inference_api = AsyncMock()
+
+        vector_db_with_index = VectorDBWithIndex(
+            vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
+        )
+
+        # Verify Chunk raises ValueError for invalid embedding type
+        with pytest.raises(ValueError, match="Input should be a valid list"):
+            Chunk(content="Test 1", embedding="invalid_type", metadata={})
+
+        # Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
+        with pytest.raises(ValueError, match="Input should be a valid list"):
+            await vector_db_with_index.insert_chunks(
+                [
+                    Chunk(content="Test 1", embedding=None, metadata={}),
+                    Chunk(content="Test 2", embedding="invalid_type", metadata={}),
+                ]
+            )
+
+        # Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
+        with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "):
+            await vector_db_with_index.insert_chunks(
+                Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={})
+            )
+
+        chunks_wrong_dim = [
+            Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}),
+        ]
+        with pytest.raises(ValueError, match="has dimension 4, expected 3"):
+            await vector_db_with_index.insert_chunks(chunks_wrong_dim)
+
+        mock_inference_api.embeddings.assert_not_called()
+        mock_index.add_chunks.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_insert_chunks_with_partially_precomputed_embeddings(self):
+        mock_vector_db = MagicMock()
+        mock_vector_db.embedding_model = "test-model with partial embeddings"
+        mock_vector_db.embedding_dimension = 3
+        mock_index = AsyncMock()
+        mock_inference_api = AsyncMock()
+
+        vector_db_with_index = VectorDBWithIndex(
+            vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
+        )
+
+        chunks = [
+            Chunk(content="Test 1", embedding=None, metadata={}),
+            Chunk(content="Test 2", embedding=[0.2, 0.2, 0.2], metadata={}),
+            Chunk(content="Test 3", embedding=None, metadata={}),
+        ]
+
+        mock_inference_api.embeddings.return_value.embeddings = [[0.1, 0.1, 0.1], [0.3, 0.3, 0.3]]
+
+        await vector_db_with_index.insert_chunks(chunks)
+
+        mock_inference_api.embeddings.assert_called_once_with(
+            "test-model with partial embeddings", ["Test 1", "Test 3"]
+        )
+        mock_index.add_chunks.assert_called_once()
+        args = mock_index.add_chunks.call_args[0]
+        assert len(args[0]) == 3
+        assert np.array_equal(args[1], np.array([[0.1, 0.1, 0.1], [0.2, 0.2, 0.2], [0.3, 0.3, 0.3]], dtype=np.float32))
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index 9896b3212..909581bb7 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -4,10 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import os
 
 import pytest
-import pytest_asyncio
 
 from llama_stack.apis.inference import Model
 from llama_stack.apis.vector_dbs import VectorDB
@@ -20,28 +18,6 @@ from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 
 
-@pytest.fixture
-def config():
-    config = SqliteKVStoreConfig(db_path="/tmp/test_registry.db")
-    if os.path.exists(config.db_path):
-        os.remove(config.db_path)
-    return config
-
-
-@pytest_asyncio.fixture(scope="function")
-async def registry(config):
-    registry = DiskDistributionRegistry(await kvstore_impl(config))
-    await registry.initialize()
-    return registry
-
-
-@pytest_asyncio.fixture(scope="function")
-async def cached_registry(config):
-    registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
-    await registry.initialize()
-    return registry
-
-
 @pytest.fixture
 def sample_vector_db():
     return VectorDB(
@@ -63,41 +39,42 @@ def sample_model():
 
 
 @pytest.mark.asyncio
-async def test_registry_initialization(registry):
+async def test_registry_initialization(disk_dist_registry):
     # Test empty registry
-    result = await registry.get("nonexistent", "nonexistent")
+    result = await disk_dist_registry.get("nonexistent", "nonexistent")
     assert result is None
 
 
 @pytest.mark.asyncio
-async def test_basic_registration(registry, sample_vector_db, sample_model):
+async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model):
     print(f"Registering {sample_vector_db}")
-    await registry.register(sample_vector_db)
+    await disk_dist_registry.register(sample_vector_db)
     print(f"Registering {sample_model}")
-    await registry.register(sample_model)
+    await disk_dist_registry.register(sample_model)
     print("Getting vector_db")
-    result_vector_db = await registry.get("vector_db", "test_vector_db")
+    result_vector_db = await disk_dist_registry.get("vector_db", "test_vector_db")
     assert result_vector_db is not None
     assert result_vector_db.identifier == sample_vector_db.identifier
     assert result_vector_db.embedding_model == sample_vector_db.embedding_model
     assert result_vector_db.provider_id == sample_vector_db.provider_id
 
-    result_model = await registry.get("model", "test_model")
+    result_model = await disk_dist_registry.get("model", "test_model")
     assert result_model is not None
     assert result_model.identifier == sample_model.identifier
     assert result_model.provider_id == sample_model.provider_id
 
 
 @pytest.mark.asyncio
-async def test_cached_registry_initialization(config, sample_vector_db, sample_model):
+async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model):
     # First populate the disk registry
-    disk_registry = DiskDistributionRegistry(await kvstore_impl(config))
+    disk_registry = DiskDistributionRegistry(sqlite_kvstore)
     await disk_registry.initialize()
     await disk_registry.register(sample_vector_db)
     await disk_registry.register(sample_model)
 
     # Test cached version loads from disk
-    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
+    db_path = sqlite_kvstore.db_path
+    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path)))
     await cached_registry.initialize()
 
     result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
@@ -109,10 +86,7 @@ async def test_cached_registry_initialization(config, sample_vector_db, sample_m
 
 
 @pytest.mark.asyncio
-async def test_cached_registry_updates(config):
-    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
-    await cached_registry.initialize()
-
+async def test_cached_registry_updates(cached_disk_dist_registry):
     new_vector_db = VectorDB(
         identifier="test_vector_db_2",
         embedding_model="all-MiniLM-L6-v2",
@@ -120,16 +94,17 @@ async def test_cached_registry_updates(config):
         provider_resource_id="test_vector_db_2",
         provider_id="baz",
     )
-    await cached_registry.register(new_vector_db)
+    await cached_disk_dist_registry.register(new_vector_db)
 
     # Verify in cache
-    result_vector_db = await cached_registry.get("vector_db", "test_vector_db_2")
+    result_vector_db = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
     assert result_vector_db is not None
     assert result_vector_db.identifier == new_vector_db.identifier
     assert result_vector_db.provider_id == new_vector_db.provider_id
 
     # Verify persisted to disk
-    new_registry = DiskDistributionRegistry(await kvstore_impl(config))
+    db_path = cached_disk_dist_registry.kvstore.db_path
+    new_registry = DiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path)))
     await new_registry.initialize()
     result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
     assert result_vector_db is not None
@@ -138,10 +113,7 @@ async def test_cached_registry_updates(config):
 
 
 @pytest.mark.asyncio
-async def test_duplicate_provider_registration(config):
-    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
-    await cached_registry.initialize()
-
+async def test_duplicate_provider_registration(cached_disk_dist_registry):
     original_vector_db = VectorDB(
         identifier="test_vector_db_2",
         embedding_model="all-MiniLM-L6-v2",
@@ -149,7 +121,7 @@ async def test_duplicate_provider_registration(config):
         provider_resource_id="test_vector_db_2",
         provider_id="baz",
     )
-    await cached_registry.register(original_vector_db)
+    await cached_disk_dist_registry.register(original_vector_db)
 
     duplicate_vector_db = VectorDB(
         identifier="test_vector_db_2",
@@ -158,18 +130,16 @@ async def test_duplicate_provider_registration(config):
         provider_resource_id="test_vector_db_2",
         provider_id="baz",  # Same provider_id
     )
-    await cached_registry.register(duplicate_vector_db)
+    await cached_disk_dist_registry.register(duplicate_vector_db)
 
-    result = await cached_registry.get("vector_db", "test_vector_db_2")
+    result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
     assert result is not None
     assert result.embedding_model == original_vector_db.embedding_model  # Original values preserved
 
 
 @pytest.mark.asyncio
-async def test_get_all_objects(config):
-    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
-    await cached_registry.initialize()
-
+async def test_get_all_objects(cached_disk_dist_registry):
+    # Create multiple test banks
     # Create multiple test banks
     test_vector_dbs = [
         VectorDB(
@@ -184,10 +154,10 @@ async def test_get_all_objects(config):
 
     # Register all vector_dbs
     for vector_db in test_vector_dbs:
-        await cached_registry.register(vector_db)
+        await cached_disk_dist_registry.register(vector_db)
 
     # Test get_all retrieval
-    all_results = await cached_registry.get_all()
+    all_results = await cached_disk_dist_registry.get_all()
     assert len(all_results) == 3
 
     # Verify each vector_db was stored correctly
@@ -201,9 +171,7 @@ async def test_get_all_objects(config):
 
 
 @pytest.mark.asyncio
-async def test_parse_registry_values_error_handling(config):
-    kvstore = await kvstore_impl(config)
-
+async def test_parse_registry_values_error_handling(sqlite_kvstore):
     valid_db = VectorDB(
         identifier="valid_vector_db",
         embedding_model="all-MiniLM-L6-v2",
@@ -212,16 +180,18 @@ async def test_parse_registry_values_error_handling(config):
         provider_id="test-provider",
     )
 
-    await kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="valid_vector_db"), valid_db.model_dump_json())
+    await sqlite_kvstore.set(
+        KEY_FORMAT.format(type="vector_db", identifier="valid_vector_db"), valid_db.model_dump_json()
+    )
 
-    await kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="corrupted_json"), "{not valid json")
+    await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="corrupted_json"), "{not valid json")
 
-    await kvstore.set(
+    await sqlite_kvstore.set(
         KEY_FORMAT.format(type="vector_db", identifier="missing_fields"),
         '{"type": "vector_db", "identifier": "missing_fields"}',
     )
 
-    test_registry = DiskDistributionRegistry(kvstore)
+    test_registry = DiskDistributionRegistry(sqlite_kvstore)
     await test_registry.initialize()
 
     # Get all objects, which should only return the valid one
@@ -240,9 +210,7 @@ async def test_parse_registry_values_error_handling(config):
 
 
 @pytest.mark.asyncio
-async def test_cached_registry_error_handling(config):
-    kvstore = await kvstore_impl(config)
-
+async def test_cached_registry_error_handling(sqlite_kvstore):
     valid_db = VectorDB(
         identifier="valid_cached_db",
         embedding_model="all-MiniLM-L6-v2",
@@ -251,14 +219,16 @@ async def test_cached_registry_error_handling(config):
         provider_id="test-provider",
     )
 
-    await kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="valid_cached_db"), valid_db.model_dump_json())
+    await sqlite_kvstore.set(
+        KEY_FORMAT.format(type="vector_db", identifier="valid_cached_db"), valid_db.model_dump_json()
+    )
 
-    await kvstore.set(
+    await sqlite_kvstore.set(
         KEY_FORMAT.format(type="vector_db", identifier="invalid_cached_db"),
         '{"type": "vector_db", "identifier": "invalid_cached_db", "embedding_model": 12345}',  # Should be string
     )
 
-    cached_registry = CachedDiskDistributionRegistry(kvstore)
+    cached_registry = CachedDiskDistributionRegistry(sqlite_kvstore)
     await cached_registry.initialize()
 
     all_objects = await cached_registry.get_all()
diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py
index ee8f28176..25ea37bfa 100644
--- a/tests/unit/registry/test_registry_acl.py
+++ b/tests/unit/registry/test_registry_acl.py
@@ -4,40 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import os
-import shutil
-import tempfile
 
 import pytest
 
 from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelWithACL
-from llama_stack.distribution.server.auth import AccessAttributes
+from llama_stack.distribution.server.auth_providers import AccessAttributes
 from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
-
-
-@pytest.fixture(scope="function")
-async def kvstore():
-    temp_dir = tempfile.mkdtemp()
-    db_path = os.path.join(temp_dir, "test_registry_acl.db")
-    kvstore_config = SqliteKVStoreConfig(db_path=db_path)
-    kvstore = SqliteKVStoreImpl(kvstore_config)
-    await kvstore.initialize()
-    yield kvstore
-    shutil.rmtree(temp_dir)
-
-
-@pytest.fixture(scope="function")
-async def registry(kvstore):
-    registry = CachedDiskDistributionRegistry(kvstore)
-    await registry.initialize()
-    return registry
 
 
 @pytest.mark.asyncio
-async def test_registry_cache_with_acl(registry):
+async def test_registry_cache_with_acl(cached_disk_dist_registry):
     model = ModelWithACL(
         identifier="model-acl",
         provider_id="test-provider",
@@ -46,30 +23,30 @@ async def test_registry_cache_with_acl(registry):
         access_attributes=AccessAttributes(roles=["admin"], teams=["ai-team"]),
     )
 
-    success = await registry.register(model)
+    success = await cached_disk_dist_registry.register(model)
     assert success
 
-    cached_model = registry.get_cached("model", "model-acl")
+    cached_model = cached_disk_dist_registry.get_cached("model", "model-acl")
     assert cached_model is not None
     assert cached_model.identifier == "model-acl"
     assert cached_model.access_attributes.roles == ["admin"]
     assert cached_model.access_attributes.teams == ["ai-team"]
 
-    fetched_model = await registry.get("model", "model-acl")
+    fetched_model = await cached_disk_dist_registry.get("model", "model-acl")
     assert fetched_model is not None
     assert fetched_model.identifier == "model-acl"
     assert fetched_model.access_attributes.roles == ["admin"]
 
     model.access_attributes = AccessAttributes(roles=["admin", "user"], projects=["project-x"])
-    await registry.update(model)
+    await cached_disk_dist_registry.update(model)
 
-    updated_cached = registry.get_cached("model", "model-acl")
+    updated_cached = cached_disk_dist_registry.get_cached("model", "model-acl")
     assert updated_cached is not None
     assert updated_cached.access_attributes.roles == ["admin", "user"]
     assert updated_cached.access_attributes.projects == ["project-x"]
     assert updated_cached.access_attributes.teams is None
 
-    new_registry = CachedDiskDistributionRegistry(registry.kvstore)
+    new_registry = CachedDiskDistributionRegistry(cached_disk_dist_registry.kvstore)
     await new_registry.initialize()
 
     new_model = await new_registry.get("model", "model-acl")
@@ -81,7 +58,7 @@ async def test_registry_cache_with_acl(registry):
 
 
 @pytest.mark.asyncio
-async def test_registry_empty_acl(registry):
+async def test_registry_empty_acl(cached_disk_dist_registry):
     model = ModelWithACL(
         identifier="model-empty-acl",
         provider_id="test-provider",
@@ -90,9 +67,9 @@ async def test_registry_empty_acl(registry):
         access_attributes=AccessAttributes(),
     )
 
-    await registry.register(model)
+    await cached_disk_dist_registry.register(model)
 
-    cached_model = registry.get_cached("model", "model-empty-acl")
+    cached_model = cached_disk_dist_registry.get_cached("model", "model-empty-acl")
     assert cached_model is not None
     assert cached_model.access_attributes is not None
     assert cached_model.access_attributes.roles is None
@@ -100,7 +77,7 @@ async def test_registry_empty_acl(registry):
     assert cached_model.access_attributes.projects is None
     assert cached_model.access_attributes.namespaces is None
 
-    all_models = await registry.get_all()
+    all_models = await cached_disk_dist_registry.get_all()
     assert len(all_models) == 1
 
     model = ModelWithACL(
@@ -110,18 +87,18 @@ async def test_registry_empty_acl(registry):
         model_type=ModelType.llm,
     )
 
-    await registry.register(model)
+    await cached_disk_dist_registry.register(model)
 
-    cached_model = registry.get_cached("model", "model-no-acl")
+    cached_model = cached_disk_dist_registry.get_cached("model", "model-no-acl")
     assert cached_model is not None
     assert cached_model.access_attributes is None
 
-    all_models = await registry.get_all()
+    all_models = await cached_disk_dist_registry.get_all()
     assert len(all_models) == 2
 
 
 @pytest.mark.asyncio
-async def test_registry_serialization(registry):
+async def test_registry_serialization(cached_disk_dist_registry):
     attributes = AccessAttributes(
         roles=["admin", "researcher"],
         teams=["ai-team", "ml-team"],
@@ -137,9 +114,9 @@ async def test_registry_serialization(registry):
         access_attributes=attributes,
     )
 
-    await registry.register(model)
+    await cached_disk_dist_registry.register(model)
 
-    new_registry = CachedDiskDistributionRegistry(registry.kvstore)
+    new_registry = CachedDiskDistributionRegistry(cached_disk_dist_registry.kvstore)
     await new_registry.initialize()
 
     loaded_model = await new_registry.get("model", "model-serialize")
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index ab0feb1a9..e352ba54d 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -4,9 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import os
-import shutil
-import tempfile
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
@@ -14,15 +11,12 @@ import pytest
 from llama_stack.apis.datatypes import Api
 from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import AccessAttributes, ModelWithACL
-from llama_stack.distribution.routers.routing_tables import ModelsRoutingTable
-from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
+from llama_stack.distribution.routing_tables.models import ModelsRoutingTable
 
 
 class AsyncMock(MagicMock):
     async def __call__(self, *args, **kwargs):
-        return super(AsyncMock, self).__call__(*args, **kwargs)
+        return super().__call__(*args, **kwargs)
 
 
 def _return_model(model):
@@ -30,29 +24,20 @@ def _return_model(model):
 
 
 @pytest.fixture
-async def test_setup():
-    temp_dir = tempfile.mkdtemp()
-    db_path = os.path.join(temp_dir, "test_access_control.db")
-    kvstore_config = SqliteKVStoreConfig(db_path=db_path)
-    kvstore = SqliteKVStoreImpl(kvstore_config)
-    await kvstore.initialize()
-    registry = CachedDiskDistributionRegistry(kvstore)
-    await registry.initialize()
-
+async def test_setup(cached_disk_dist_registry):
     mock_inference = Mock()
     mock_inference.__provider_spec__ = MagicMock()
     mock_inference.__provider_spec__.api = Api.inference
     mock_inference.register_model = AsyncMock(side_effect=_return_model)
     routing_table = ModelsRoutingTable(
         impls_by_provider_id={"test_provider": mock_inference},
-        dist_registry=registry,
+        dist_registry=cached_disk_dist_registry,
     )
-    yield registry, routing_table
-    shutil.rmtree(temp_dir)
+    yield cached_disk_dist_registry, routing_table
 
 
 @pytest.mark.asyncio
-@patch("llama_stack.distribution.routers.routing_tables.get_auth_attributes")
+@patch("llama_stack.distribution.routing_tables.common.get_auth_attributes")
 async def test_access_control_with_cache(mock_get_auth_attributes, test_setup):
     registry, routing_table = test_setup
     model_public = ModelWithACL(
@@ -117,7 +102,7 @@ async def test_access_control_with_cache(mock_get_auth_attributes, test_setup):
 
 
 @pytest.mark.asyncio
-@patch("llama_stack.distribution.routers.routing_tables.get_auth_attributes")
+@patch("llama_stack.distribution.routing_tables.common.get_auth_attributes")
 async def test_access_control_and_updates(mock_get_auth_attributes, test_setup):
     registry, routing_table = test_setup
     model_public = ModelWithACL(
@@ -147,7 +132,7 @@ async def test_access_control_and_updates(mock_get_auth_attributes, test_setup):
 
 
 @pytest.mark.asyncio
-@patch("llama_stack.distribution.routers.routing_tables.get_auth_attributes")
+@patch("llama_stack.distribution.routing_tables.common.get_auth_attributes")
 async def test_access_control_empty_attributes(mock_get_auth_attributes, test_setup):
     registry, routing_table = test_setup
     model = ModelWithACL(
@@ -169,7 +154,7 @@ async def test_access_control_empty_attributes(mock_get_auth_attributes, test_se
 
 
 @pytest.mark.asyncio
-@patch("llama_stack.distribution.routers.routing_tables.get_auth_attributes")
+@patch("llama_stack.distribution.routing_tables.common.get_auth_attributes")
 async def test_no_user_attributes(mock_get_auth_attributes, test_setup):
     registry, routing_table = test_setup
     model_public = ModelWithACL(
@@ -200,7 +185,7 @@ async def test_no_user_attributes(mock_get_auth_attributes, test_setup):
 
 
 @pytest.mark.asyncio
-@patch("llama_stack.distribution.routers.routing_tables.get_auth_attributes")
+@patch("llama_stack.distribution.routing_tables.common.get_auth_attributes")
 async def test_automatic_access_attributes(mock_get_auth_attributes, test_setup):
     """Test that newly created resources inherit access attributes from their creator."""
     registry, routing_table = test_setup
diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py
index 5e93719d2..408acb88a 100644
--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@@ -4,13 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import base64
 from unittest.mock import AsyncMock, patch
 
 import pytest
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
+from llama_stack.distribution.datatypes import AuthenticationConfig
 from llama_stack.distribution.server.auth import AuthenticationMiddleware
+from llama_stack.distribution.server.auth_providers import (
+    AuthProviderType,
+    get_attributes_from_claims,
+)
 
 
 class MockResponse:
@@ -21,6 +27,10 @@ class MockResponse:
     def json(self):
         return self._json_data
 
+    def raise_for_status(self):
+        if self.status_code != 200:
+            raise Exception(f"HTTP error: {self.status_code}")
+
 
 @pytest.fixture
 def mock_auth_endpoint():
@@ -38,9 +48,23 @@ def invalid_api_key():
 
 
 @pytest.fixture
-def app(mock_auth_endpoint):
+def valid_token():
+    return "valid.jwt.token"
+
+
+@pytest.fixture
+def invalid_token():
+    return "invalid.jwt.token"
+
+
+@pytest.fixture
+def http_app(mock_auth_endpoint):
     app = FastAPI()
-    app.add_middleware(AuthenticationMiddleware, auth_endpoint=mock_auth_endpoint)
+    auth_config = AuthenticationConfig(
+        provider_type=AuthProviderType.CUSTOM,
+        config={"endpoint": mock_auth_endpoint},
+    )
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
 
     @app.get("/test")
     def test_endpoint():
@@ -50,8 +74,29 @@ def app(mock_auth_endpoint):
 
 
 @pytest.fixture
-def client(app):
-    return TestClient(app)
+def k8s_app():
+    app = FastAPI()
+    auth_config = AuthenticationConfig(
+        provider_type=AuthProviderType.KUBERNETES,
+        config={"api_server_url": "https://kubernetes.default.svc"},
+    )
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+
+    @app.get("/test")
+    def test_endpoint():
+        return {"message": "Authentication successful"}
+
+    return app
+
+
+@pytest.fixture
+def http_client(http_app):
+    return TestClient(http_app)
+
+
+@pytest.fixture
+def k8s_client(k8s_app):
+    return TestClient(k8s_app)
 
 
 @pytest.fixture
@@ -61,7 +106,7 @@ def mock_scope():
         "path": "/models/list",
         "headers": [
             (b"content-type", b"application/json"),
-            (b"authorization", b"Bearer test-api-key"),
+            (b"authorization", b"Bearer test.jwt.token"),
             (b"user-agent", b"test-user-agent"),
         ],
         "query_string": b"limit=100&offset=0",
@@ -69,13 +114,39 @@ def mock_scope():
 
 
 @pytest.fixture
-def mock_middleware(mock_auth_endpoint):
+def mock_http_middleware(mock_auth_endpoint):
     mock_app = AsyncMock()
-    return AuthenticationMiddleware(mock_app, mock_auth_endpoint), mock_app
+    auth_config = AuthenticationConfig(
+        provider_type=AuthProviderType.CUSTOM,
+        config={"endpoint": mock_auth_endpoint},
+    )
+    return AuthenticationMiddleware(mock_app, auth_config), mock_app
+
+
+@pytest.fixture
+def mock_k8s_middleware():
+    mock_app = AsyncMock()
+    auth_config = AuthenticationConfig(
+        provider_type=AuthProviderType.KUBERNETES,
+        config={"api_server_url": "https://kubernetes.default.svc"},
+    )
+    return AuthenticationMiddleware(mock_app, auth_config), mock_app
 
 
 async def mock_post_success(*args, **kwargs):
-    return MockResponse(200, {"message": "Authentication successful"})
+    return MockResponse(
+        200,
+        {
+            "message": "Authentication successful",
+            "principal": "test-principal",
+            "access_attributes": {
+                "roles": ["admin", "user"],
+                "teams": ["ml-team", "nlp-team"],
+                "projects": ["llama-3", "project-x"],
+                "namespaces": ["research", "production"],
+            },
+        },
+    )
 
 
 async def mock_post_failure(*args, **kwargs):
@@ -86,45 +157,46 @@ async def mock_post_exception(*args, **kwargs):
     raise Exception("Connection error")
 
 
-def test_missing_auth_header(client):
-    response = client.get("/test")
+# HTTP Endpoint Tests
+def test_missing_auth_header(http_client):
+    response = http_client.get("/test")
     assert response.status_code == 401
     assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
 
 
-def test_invalid_auth_header_format(client):
-    response = client.get("/test", headers={"Authorization": "InvalidFormat token123"})
+def test_invalid_auth_header_format(http_client):
+    response = http_client.get("/test", headers={"Authorization": "InvalidFormat token123"})
     assert response.status_code == 401
     assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
 
 
 @patch("httpx.AsyncClient.post", new=mock_post_success)
-def test_valid_authentication(client, valid_api_key):
-    response = client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
+def test_valid_http_authentication(http_client, valid_api_key):
+    response = http_client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
     assert response.status_code == 200
     assert response.json() == {"message": "Authentication successful"}
 
 
 @patch("httpx.AsyncClient.post", new=mock_post_failure)
-def test_invalid_authentication(client, invalid_api_key):
-    response = client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
+def test_invalid_http_authentication(http_client, invalid_api_key):
+    response = http_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
     assert response.status_code == 401
     assert "Authentication failed" in response.json()["error"]["message"]
 
 
 @patch("httpx.AsyncClient.post", new=mock_post_exception)
-def test_auth_service_error(client, valid_api_key):
-    response = client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
+def test_http_auth_service_error(http_client, valid_api_key):
+    response = http_client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
     assert response.status_code == 401
     assert "Authentication service error" in response.json()["error"]["message"]
 
 
-def test_auth_request_payload(client, valid_api_key, mock_auth_endpoint):
+def test_http_auth_request_payload(http_client, valid_api_key, mock_auth_endpoint):
     with patch("httpx.AsyncClient.post") as mock_post:
         mock_response = MockResponse(200, {"message": "Authentication successful"})
         mock_post.return_value = mock_response
 
-        client.get(
+        http_client.get(
             "/test?param1=value1&param2=value2",
             headers={
                 "Authorization": f"Bearer {valid_api_key}",
@@ -149,40 +221,44 @@ def test_auth_request_payload(client, valid_api_key, mock_auth_endpoint):
 
 
 @pytest.mark.asyncio
-async def test_auth_middleware_with_access_attributes(mock_middleware, mock_scope):
-    middleware, mock_app = mock_middleware
+async def test_http_middleware_with_access_attributes(mock_http_middleware, mock_scope):
+    """Test HTTP middleware behavior with access attributes"""
+    middleware, mock_app = mock_http_middleware
     mock_receive = AsyncMock()
     mock_send = AsyncMock()
 
-    with patch("httpx.AsyncClient") as mock_client:
-        mock_client_instance = AsyncMock()
-        mock_client.return_value.__aenter__.return_value = mock_client_instance
-
-        mock_client_instance.post.return_value = MockResponse(
+    with patch("httpx.AsyncClient.post") as mock_post:
+        mock_response = MockResponse(
             200,
             {
+                "message": "Authentication successful",
+                "principal": "test-principal",
                 "access_attributes": {
                     "roles": ["admin", "user"],
-                    "teams": ["ml-team"],
-                    "projects": ["project-x", "project-y"],
-                }
+                    "teams": ["ml-team", "nlp-team"],
+                    "projects": ["llama-3", "project-x"],
+                    "namespaces": ["research", "production"],
+                },
             },
         )
+        mock_post.return_value = mock_response
 
         await middleware(mock_scope, mock_receive, mock_send)
 
         assert "user_attributes" in mock_scope
-        assert mock_scope["user_attributes"]["roles"] == ["admin", "user"]
-        assert mock_scope["user_attributes"]["teams"] == ["ml-team"]
-        assert mock_scope["user_attributes"]["projects"] == ["project-x", "project-y"]
+        attributes = mock_scope["user_attributes"]
+        assert attributes["roles"] == ["admin", "user"]
+        assert attributes["teams"] == ["ml-team", "nlp-team"]
+        assert attributes["projects"] == ["llama-3", "project-x"]
+        assert attributes["namespaces"] == ["research", "production"]
 
         mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send)
 
 
 @pytest.mark.asyncio
-async def test_auth_middleware_no_attributes(mock_middleware, mock_scope):
+async def test_http_middleware_no_attributes(mock_http_middleware, mock_scope):
     """Test middleware behavior with no access attributes"""
-    middleware, mock_app = mock_middleware
+    middleware, mock_app = mock_http_middleware
     mock_receive = AsyncMock()
     mock_send = AsyncMock()
 
@@ -202,5 +278,295 @@ async def test_auth_middleware_no_attributes(mock_middleware, mock_scope):
 
         assert "user_attributes" in mock_scope
         attributes = mock_scope["user_attributes"]
-        assert "namespaces" in attributes
-        assert attributes["namespaces"] == ["test-api-key"]
+        assert "roles" in attributes
+        assert attributes["roles"] == ["test.jwt.token"]
+
+
+# oauth2 token provider tests
+
+
+@pytest.fixture
+def oauth2_app():
+    app = FastAPI()
+    auth_config = AuthenticationConfig(
+        provider_type=AuthProviderType.OAUTH2_TOKEN,
+        config={
+            "jwks": {
+                "uri": "http://mock-authz-service/token/introspect",
+                "key_recheck_period": "3600",
+            },
+            "audience": "llama-stack",
+        },
+    )
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+
+    @app.get("/test")
+    def test_endpoint():
+        return {"message": "Authentication successful"}
+
+    return app
+
+
+@pytest.fixture
+def oauth2_client(oauth2_app):
+    return TestClient(oauth2_app)
+
+
+def test_missing_auth_header_oauth2(oauth2_client):
+    response = oauth2_client.get("/test")
+    assert response.status_code == 401
+    assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
+
+
+def test_invalid_auth_header_format_oauth2(oauth2_client):
+    response = oauth2_client.get("/test", headers={"Authorization": "InvalidFormat token123"})
+    assert response.status_code == 401
+    assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
+
+
+async def mock_jwks_response(*args, **kwargs):
+    return MockResponse(
+        200,
+        {
+            "keys": [
+                {
+                    "kid": "1234567890",
+                    "kty": "oct",
+                    "alg": "HS256",
+                    "use": "sig",
+                    "k": base64.b64encode(b"foobarbaz").decode(),
+                }
+            ]
+        },
+    )
+
+
+@pytest.fixture
+def jwt_token_valid():
+    from jose import jwt
+
+    return jwt.encode(
+        {
+            "sub": "my-user",
+            "groups": ["group1", "group2"],
+            "scope": "foo bar",
+            "aud": "llama-stack",
+        },
+        key="foobarbaz",
+        algorithm="HS256",
+        headers={"kid": "1234567890"},
+    )
+
+
+@patch("httpx.AsyncClient.get", new=mock_jwks_response)
+def test_valid_oauth2_authentication(oauth2_client, jwt_token_valid):
+    response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
+    assert response.status_code == 200
+    assert response.json() == {"message": "Authentication successful"}
+
+
+@patch("httpx.AsyncClient.get", new=mock_jwks_response)
+def test_invalid_oauth2_authentication(oauth2_client, invalid_token):
+    response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"})
+    assert response.status_code == 401
+    assert "Invalid JWT token" in response.json()["error"]["message"]
+
+
+def test_get_attributes_from_claims():
+    claims = {
+        "sub": "my-user",
+        "groups": ["group1", "group2"],
+        "scope": "foo bar",
+        "aud": "llama-stack",
+    }
+    attributes = get_attributes_from_claims(claims, {"sub": "roles", "groups": "teams"})
+    assert attributes.roles == ["my-user"]
+    assert attributes.teams == ["group1", "group2"]
+
+    claims = {
+        "sub": "my-user",
+        "tenant": "my-tenant",
+    }
+    attributes = get_attributes_from_claims(claims, {"sub": "roles", "tenant": "namespaces"})
+    assert attributes.roles == ["my-user"]
+    assert attributes.namespaces == ["my-tenant"]
+
+    claims = {
+        "sub": "my-user",
+        "username": "my-username",
+        "tenant": "my-tenant",
+        "groups": ["group1", "group2"],
+        "team": "my-team",
+    }
+    attributes = get_attributes_from_claims(
+        claims,
+        {
+            "sub": "roles",
+            "tenant": "namespaces",
+            "username": "roles",
+            "team": "teams",
+            "groups": "teams",
+        },
+    )
+    assert set(attributes.roles) == {"my-user", "my-username"}
+    assert set(attributes.teams) == {"my-team", "group1", "group2"}
+    assert attributes.namespaces == ["my-tenant"]
+
+
+# TODO: add more tests for oauth2 token provider
+
+
+# oauth token introspection tests
+@pytest.fixture
+def mock_introspection_endpoint():
+    return "http://mock-authz-service/token/introspect"
+
+
+@pytest.fixture
+def introspection_app(mock_introspection_endpoint):
+    app = FastAPI()
+    auth_config = AuthenticationConfig(
+        provider_type=AuthProviderType.OAUTH2_TOKEN,
+        config={
+            "jwks": None,
+            "introspection": {"url": mock_introspection_endpoint, "client_id": "myclient", "client_secret": "abcdefg"},
+        },
+    )
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+
+    @app.get("/test")
+    def test_endpoint():
+        return {"message": "Authentication successful"}
+
+    return app
+
+
+@pytest.fixture
+def introspection_app_with_custom_mapping(mock_introspection_endpoint):
+    app = FastAPI()
+    auth_config = AuthenticationConfig(
+        provider_type=AuthProviderType.OAUTH2_TOKEN,
+        config={
+            "jwks": None,
+            "introspection": {
+                "url": mock_introspection_endpoint,
+                "client_id": "myclient",
+                "client_secret": "abcdefg",
+                "send_secret_in_body": "true",
+            },
+            "claims_mapping": {
+                "sub": "roles",
+                "scope": "roles",
+                "groups": "teams",
+                "aud": "namespaces",
+            },
+        },
+    )
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+
+    @app.get("/test")
+    def test_endpoint():
+        return {"message": "Authentication successful"}
+
+    return app
+
+
+@pytest.fixture
+def introspection_client(introspection_app):
+    return TestClient(introspection_app)
+
+
+@pytest.fixture
+def introspection_client_with_custom_mapping(introspection_app_with_custom_mapping):
+    return TestClient(introspection_app_with_custom_mapping)
+
+
+def test_missing_auth_header_introspection(introspection_client):
+    response = introspection_client.get("/test")
+    assert response.status_code == 401
+    assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
+
+
+def test_invalid_auth_header_format_introspection(introspection_client):
+    response = introspection_client.get("/test", headers={"Authorization": "InvalidFormat token123"})
+    assert response.status_code == 401
+    assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
+
+
+async def mock_introspection_active(*args, **kwargs):
+    return MockResponse(
+        200,
+        {
+            "active": True,
+            "sub": "my-user",
+            "groups": ["group1", "group2"],
+            "scope": "foo bar",
+            "aud": ["set1", "set2"],
+        },
+    )
+
+
+async def mock_introspection_inactive(*args, **kwargs):
+    return MockResponse(
+        200,
+        {
+            "active": False,
+        },
+    )
+
+
+async def mock_introspection_invalid(*args, **kwargs):
+    class InvalidResponse:
+        def __init__(self, status_code):
+            self.status_code = status_code
+
+        def json(self):
+            raise ValueError("Not JSON")
+
+    return InvalidResponse(200)
+
+
+async def mock_introspection_failed(*args, **kwargs):
+    return MockResponse(
+        500,
+        {},
+    )
+
+
+@patch("httpx.AsyncClient.post", new=mock_introspection_active)
+def test_valid_introspection_authentication(introspection_client, valid_api_key):
+    response = introspection_client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
+    assert response.status_code == 200
+    assert response.json() == {"message": "Authentication successful"}
+
+
+@patch("httpx.AsyncClient.post", new=mock_introspection_inactive)
+def test_inactive_introspection_authentication(introspection_client, invalid_api_key):
+    response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
+    assert response.status_code == 401
+    assert "Token not active" in response.json()["error"]["message"]
+
+
+@patch("httpx.AsyncClient.post", new=mock_introspection_invalid)
+def test_invalid_introspection_authentication(introspection_client, invalid_api_key):
+    response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
+    assert response.status_code == 401
+    assert "Not JSON" in response.json()["error"]["message"]
+
+
+@patch("httpx.AsyncClient.post", new=mock_introspection_failed)
+def test_failed_introspection_authentication(introspection_client, invalid_api_key):
+    response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
+    assert response.status_code == 401
+    assert "Token introspection failed: 500" in response.json()["error"]["message"]
+
+
+@patch("httpx.AsyncClient.post", new=mock_introspection_active)
+def test_valid_introspection_with_custom_mapping_authentication(
+    introspection_client_with_custom_mapping, valid_api_key
+):
+    response = introspection_client_with_custom_mapping.get(
+        "/test", headers={"Authorization": f"Bearer {valid_api_key}"}
+    )
+    assert response.status_code == 200
+    assert response.json() == {"message": "Authentication successful"}
diff --git a/tests/unit/server/test_quota.py b/tests/unit/server/test_quota.py
new file mode 100644
index 000000000..763bf8e94
--- /dev/null
+++ b/tests/unit/server/test_quota.py
@@ -0,0 +1,127 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+from fastapi import FastAPI, Request
+from fastapi.testclient import TestClient
+from starlette.middleware.base import BaseHTTPMiddleware
+
+from llama_stack.distribution.datatypes import QuotaConfig, QuotaPeriod
+from llama_stack.distribution.server.quota import QuotaMiddleware
+from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+
+
+class InjectClientIDMiddleware(BaseHTTPMiddleware):
+    """
+    Middleware that injects 'authenticated_client_id' to mimic AuthenticationMiddleware.
+    """
+
+    def __init__(self, app, client_id="client1"):
+        super().__init__(app)
+        self.client_id = client_id
+
+    async def dispatch(self, request: Request, call_next):
+        request.scope["authenticated_client_id"] = self.client_id
+        return await call_next(request)
+
+
+def build_quota_config(db_path) -> QuotaConfig:
+    return QuotaConfig(
+        kvstore=SqliteKVStoreConfig(db_path=str(db_path)),
+        anonymous_max_requests=1,
+        authenticated_max_requests=2,
+        period=QuotaPeriod.DAY,
+    )
+
+
+@pytest.fixture
+def auth_app(tmp_path, request):
+    """
+    FastAPI app with InjectClientIDMiddleware and QuotaMiddleware for authenticated testing.
+    Each test gets its own DB file.
+    """
+    inner_app = FastAPI()
+
+    @inner_app.get("/test")
+    async def test_endpoint():
+        return {"message": "ok"}
+
+    db_path = tmp_path / f"quota_{request.node.name}.db"
+    quota = build_quota_config(db_path)
+
+    app = InjectClientIDMiddleware(
+        QuotaMiddleware(
+            inner_app,
+            kv_config=quota.kvstore,
+            anonymous_max_requests=quota.anonymous_max_requests,
+            authenticated_max_requests=quota.authenticated_max_requests,
+            window_seconds=86400,
+        ),
+        client_id=f"client_{request.node.name}",
+    )
+    return app
+
+
+def test_authenticated_quota_allows_up_to_limit(auth_app):
+    client = TestClient(auth_app)
+    assert client.get("/test").status_code == 200
+    assert client.get("/test").status_code == 200
+
+
+def test_authenticated_quota_blocks_after_limit(auth_app):
+    client = TestClient(auth_app)
+    client.get("/test")
+    client.get("/test")
+    resp = client.get("/test")
+    assert resp.status_code == 429
+    assert resp.json()["error"]["message"] == "Quota exceeded"
+
+
+def test_anonymous_quota_allows_up_to_limit(tmp_path, request):
+    inner_app = FastAPI()
+
+    @inner_app.get("/test")
+    async def test_endpoint():
+        return {"message": "ok"}
+
+    db_path = tmp_path / f"quota_anon_{request.node.name}.db"
+    quota = build_quota_config(db_path)
+
+    app = QuotaMiddleware(
+        inner_app,
+        kv_config=quota.kvstore,
+        anonymous_max_requests=quota.anonymous_max_requests,
+        authenticated_max_requests=quota.authenticated_max_requests,
+        window_seconds=86400,
+    )
+
+    client = TestClient(app)
+    assert client.get("/test").status_code == 200
+
+
+def test_anonymous_quota_blocks_after_limit(tmp_path, request):
+    inner_app = FastAPI()
+
+    @inner_app.get("/test")
+    async def test_endpoint():
+        return {"message": "ok"}
+
+    db_path = tmp_path / f"quota_anon_{request.node.name}.db"
+    quota = build_quota_config(db_path)
+
+    app = QuotaMiddleware(
+        inner_app,
+        kv_config=quota.kvstore,
+        anonymous_max_requests=quota.anonymous_max_requests,
+        authenticated_max_requests=quota.authenticated_max_requests,
+        window_seconds=86400,
+    )
+
+    client = TestClient(app)
+    client.get("/test")
+    resp = client.get("/test")
+    assert resp.status_code == 429
+    assert resp.json()["error"]["message"] == "Quota exceeded"
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index fcf0b3945..bb4c15dbc 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -6,7 +6,7 @@
 
 import inspect
 import sys
-from typing import Any, Dict, Protocol
+from typing import Any, Protocol
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
@@ -19,8 +19,8 @@ from llama_stack.distribution.datatypes import (
     StackRunConfig,
 )
 from llama_stack.distribution.resolver import resolve_impls
-from llama_stack.distribution.routers.routers import InferenceRouter
-from llama_stack.distribution.routers.routing_tables import ModelsRoutingTable
+from llama_stack.distribution.routers.inference import InferenceRouter
+from llama_stack.distribution.routing_tables.models import ModelsRoutingTable
 from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec
 
 
@@ -48,14 +48,14 @@ class SampleConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
+    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
             "foo": "baz",
         }
 
 
 class SampleImpl:
-    def __init__(self, config: SampleConfig, deps: Dict[Api, Any], provider_spec: ProviderSpec = None):
+    def __init__(self, config: SampleConfig, deps: dict[Api, Any], provider_spec: ProviderSpec = None):
         self.__provider_id__ = "test_provider"
         self.__provider_spec__ = provider_spec
         self.__provider_config__ = config
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
new file mode 100644
index 000000000..c78122294
--- /dev/null
+++ b/tests/unit/server/test_sse.py
@@ -0,0 +1,91 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+
+import pytest
+
+from llama_stack.distribution.server.server import create_sse_event, sse_generator
+
+
+@pytest.mark.asyncio
+async def test_sse_generator_basic():
+    # An AsyncIterator wrapped in an Awaitable, just like our web methods
+    async def async_event_gen():
+        async def event_gen():
+            yield "Test event 1"
+            yield "Test event 2"
+
+        return event_gen()
+
+    sse_gen = sse_generator(async_event_gen())
+    assert sse_gen is not None
+
+    # Test that the events are streamed correctly
+    seen_events = []
+    async for event in sse_gen:
+        seen_events.append(event)
+    assert len(seen_events) == 2
+    assert seen_events[0] == create_sse_event("Test event 1")
+    assert seen_events[1] == create_sse_event("Test event 2")
+
+
+@pytest.mark.asyncio
+async def test_sse_generator_client_disconnected():
+    # An AsyncIterator wrapped in an Awaitable, just like our web methods
+    async def async_event_gen():
+        async def event_gen():
+            yield "Test event 1"
+            # Simulate a client disconnect before emitting event 2
+            raise asyncio.CancelledError()
+
+        return event_gen()
+
+    sse_gen = sse_generator(async_event_gen())
+    assert sse_gen is not None
+
+    seen_events = []
+    async for event in sse_gen:
+        seen_events.append(event)
+
+    # We should see 1 event before the client disconnected
+    assert len(seen_events) == 1
+    assert seen_events[0] == create_sse_event("Test event 1")
+
+
+@pytest.mark.asyncio
+async def test_sse_generator_client_disconnected_before_response_starts():
+    # Disconnect before the response starts
+    async def async_event_gen():
+        raise asyncio.CancelledError()
+
+    sse_gen = sse_generator(async_event_gen())
+    assert sse_gen is not None
+
+    seen_events = []
+    async for event in sse_gen:
+        seen_events.append(event)
+
+    # No events should be seen since the client disconnected immediately
+    assert len(seen_events) == 0
+
+
+@pytest.mark.asyncio
+async def test_sse_generator_error_before_response_starts():
+    # Raise an error before the response starts
+    async def async_event_gen():
+        raise Exception("Test error")
+
+    sse_gen = sse_generator(async_event_gen())
+    assert sse_gen is not None
+
+    seen_events = []
+    async for event in sse_gen:
+        seen_events.append(event)
+
+    # We should have 1 error event
+    assert len(seen_events) == 1
+    assert 'data: {"error":' in seen_events[0]
diff --git a/tests/unit/utils/test_sqlstore.py b/tests/unit/utils/test_sqlstore.py
new file mode 100644
index 000000000..6231e9082
--- /dev/null
+++ b/tests/unit/utils/test_sqlstore.py
@@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from tempfile import TemporaryDirectory
+
+import pytest
+
+from llama_stack.providers.utils.sqlstore.api import ColumnType
+from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+
+
+@pytest.mark.asyncio
+async def test_sqlite_sqlstore():
+    with TemporaryDirectory() as tmp_dir:
+        db_name = "test.db"
+        sqlstore = SqlAlchemySqlStoreImpl(
+            SqliteSqlStoreConfig(
+                db_path=tmp_dir + "/" + db_name,
+            )
+        )
+        await sqlstore.create_table(
+            table="test",
+            schema={
+                "id": ColumnType.INTEGER,
+                "name": ColumnType.STRING,
+            },
+        )
+        await sqlstore.insert("test", {"id": 1, "name": "test"})
+        await sqlstore.insert("test", {"id": 12, "name": "test12"})
+        rows = await sqlstore.fetch_all("test")
+        assert rows == [{"id": 1, "name": "test"}, {"id": 12, "name": "test12"}]
+
+        row = await sqlstore.fetch_one("test", {"id": 1})
+        assert row == {"id": 1, "name": "test"}
+
+        row = await sqlstore.fetch_one("test", {"name": "test12"})
+        assert row == {"id": 12, "name": "test12"}
+
+        # order by
+        rows = await sqlstore.fetch_all("test", order_by=[("id", "asc")])
+        assert rows == [{"id": 1, "name": "test"}, {"id": 12, "name": "test12"}]
+
+        rows = await sqlstore.fetch_all("test", order_by=[("id", "desc")])
+        assert rows == [{"id": 12, "name": "test12"}, {"id": 1, "name": "test"}]
+
+        # limit
+        rows = await sqlstore.fetch_all("test", limit=1)
+        assert rows == [{"id": 1, "name": "test"}]
+
+        # update
+        await sqlstore.update("test", {"name": "test123"}, {"id": 1})
+        row = await sqlstore.fetch_one("test", {"id": 1})
+        assert row == {"id": 1, "name": "test123"}
+
+        # delete
+        await sqlstore.delete("test", {"id": 1})
+        rows = await sqlstore.fetch_all("test")
+        assert rows == [{"id": 12, "name": "test12"}]
diff --git a/tests/verifications/README.md b/tests/verifications/README.md
index 986ff1087..b6c332cac 100644
--- a/tests/verifications/README.md
+++ b/tests/verifications/README.md
@@ -4,33 +4,48 @@ Llama Stack Verifications provide standardized test suites to ensure API compati
 
 ## Overview
 
-This framework allows you to run the same set of verification tests against different LLM providers'  OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards.
+This framework allows you to run the same set of verification tests against different LLM providers' OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards.
 
 ## Features
 
-The verification suite currently tests:
+The verification suite currently tests the following in both streaming and non-streaming modes:
 
-- Basic chat completions (streaming and non-streaming)
+- Basic chat completions
 - Image input capabilities
 - Structured JSON output formatting
 - Tool calling functionality
 
+## Report
+
+The lastest report can be found at [REPORT.md](REPORT.md).
+
+To update the report, ensure you have the API keys set,
+```bash
+export OPENAI_API_KEY=<your_openai_api_key>
+export FIREWORKS_API_KEY=<your_fireworks_api_key>
+export TOGETHER_API_KEY=<your_together_api_key>
+```
+then run
+```bash
+uv run python tests/verifications/generate_report.py --run-tests
+```
+
 ## Running Tests
 
 To run the verification tests, use pytest with the following parameters:
 
 ```bash
 cd llama-stack
-pytest tests/verifications/openai --provider=<provider-name>
+pytest tests/verifications/openai_api --provider=<provider-name>
 ```
 
 Example:
 ```bash
 # Run all tests
-pytest tests/verifications/openai --provider=together
+pytest tests/verifications/openai_api --provider=together
 
 # Only run tests with Llama 4 models
-pytest tests/verifications/openai --provider=together -k 'Llama-4'
+pytest tests/verifications/openai_api --provider=together -k 'Llama-4'
 ```
 
 ### Parameters
@@ -41,23 +56,22 @@ pytest tests/verifications/openai --provider=together -k 'Llama-4'
 
 ## Supported Providers
 
-The verification suite currently supports:
-- OpenAI
-- Fireworks
-- Together
-- Groq
-- Cerebras
+The verification suite supports any provider with an OpenAI compatible endpoint.
+
+See `tests/verifications/conf/` for the list of supported providers.
+
+To run on a new provider, simply add a new yaml file to the `conf/` directory with the provider config. See `tests/verifications/conf/together.yaml` for an example.
 
 ## Adding New Test Cases
 
-To add new test cases, create appropriate JSON files in the `openai/fixtures/test_cases/` directory following the existing patterns.
+To add new test cases, create appropriate JSON files in the `openai_api/fixtures/test_cases/` directory following the existing patterns.
 
 
 ## Structure
 
 - `__init__.py` - Marks the directory as a Python package
-- `conftest.py` - Global pytest configuration and fixtures
-- `openai/` - Tests specific to OpenAI-compatible APIs
+- `conf/` - Provider-specific configuration files
+- `openai_api/` - Tests specific to OpenAI-compatible APIs
   - `fixtures/` - Test fixtures and utilities
     - `fixtures.py` - Provider-specific fixtures
     - `load.py` - Utilities for loading test cases
diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md
index 2309c6404..2a700fa9c 100644
--- a/tests/verifications/REPORT.md
+++ b/tests/verifications/REPORT.md
@@ -1,6 +1,6 @@
 # Test Results Report
 
-*Generated on: 2025-04-10 16:48:18*
+*Generated on: 2025-04-17 12:42:33*
 
 *This report was generated by running `python tests/verifications/generate_report.py`*
 
@@ -15,22 +15,74 @@
 
 | Provider | Pass Rate | Tests Passed | Total Tests |
 | --- | --- | --- | --- |
-| Together | 64.7% | 22 | 34 |
-| Fireworks | 82.4% | 28 | 34 |
-| Openai | 100.0% | 24 | 24 |
+| Meta_reference | 100.0% | 28 | 28 |
+| Together | 50.0% | 40 | 80 |
+| Fireworks | 50.0% | 40 | 80 |
+| Openai | 100.0% | 56 | 56 |
 
 
 
+## Meta_reference
+
+*Tests run on: 2025-04-17 12:37:11*
+
+```bash
+# Run all tests for this provider:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -v
+
+# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -k "test_chat_multi_turn_multiple_images and stream=False"
+```
+
+
+**Model Key (Meta_reference)**
+
+| Display Name | Full Model ID |
+| --- | --- |
+| Llama-4-Scout-Instruct | `meta-llama/Llama-4-Scout-17B-16E-Instruct` |
+
+
+| Test | Llama-4-Scout-Instruct |
+| --- | --- |
+| test_chat_multi_turn_multiple_images (stream=False) | ✅ |
+| test_chat_multi_turn_multiple_images (stream=True) | ✅ |
+| test_chat_non_streaming_basic (earth) | ✅ |
+| test_chat_non_streaming_basic (saturn) | ✅ |
+| test_chat_non_streaming_image | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ |
+| test_chat_non_streaming_structured_output (calendar) | ✅ |
+| test_chat_non_streaming_structured_output (math) | ✅ |
+| test_chat_non_streaming_tool_calling | ✅ |
+| test_chat_non_streaming_tool_choice_none | ✅ |
+| test_chat_non_streaming_tool_choice_required | ✅ |
+| test_chat_streaming_basic (earth) | ✅ |
+| test_chat_streaming_basic (saturn) | ✅ |
+| test_chat_streaming_image | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ |
+| test_chat_streaming_structured_output (calendar) | ✅ |
+| test_chat_streaming_structured_output (math) | ✅ |
+| test_chat_streaming_tool_calling | ✅ |
+| test_chat_streaming_tool_choice_none | ✅ |
+| test_chat_streaming_tool_choice_required | ✅ |
+
 ## Together
 
-*Tests run on: 2025-04-10 16:46:35*
+*Tests run on: 2025-04-17 12:27:45*
 
 ```bash
 # Run all tests for this provider:
 pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -v
 
-# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
-pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_non_streaming_basic and earth"
+# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_multi_turn_multiple_images and stream=False"
 ```
 
 
@@ -45,29 +97,45 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe
 
 | Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct |
 | --- | --- | --- | --- |
+| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ |
+| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ❌ | ❌ |
 | test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_image | ⚪ | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ❌ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_tool_calling | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_tool_choice_none | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_tool_choice_required | ✅ | ✅ | ✅ |
 | test_chat_streaming_basic (earth) | ✅ | ❌ | ❌ |
 | test_chat_streaming_basic (saturn) | ✅ | ❌ | ❌ |
 | test_chat_streaming_image | ⚪ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ |
 | test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ |
 | test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ |
 | test_chat_streaming_tool_calling | ✅ | ❌ | ❌ |
+| test_chat_streaming_tool_choice_none | ❌ | ❌ | ❌ |
+| test_chat_streaming_tool_choice_required | ✅ | ❌ | ❌ |
 
 ## Fireworks
 
-*Tests run on: 2025-04-10 16:44:44*
+*Tests run on: 2025-04-17 12:29:53*
 
 ```bash
 # Run all tests for this provider:
 pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -v
 
-# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
-pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_non_streaming_basic and earth"
+# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_multi_turn_multiple_images and stream=False"
 ```
 
 
@@ -82,29 +150,45 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor
 
 | Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct |
 | --- | --- | --- | --- |
+| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ |
+| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ✅ | ✅ |
 | test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_image | ⚪ | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ |
 | test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_tool_calling | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_tool_choice_none | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_tool_choice_required | ✅ | ❌ | ❌ |
 | test_chat_streaming_basic (earth) | ✅ | ✅ | ✅ |
 | test_chat_streaming_basic (saturn) | ✅ | ✅ | ✅ |
 | test_chat_streaming_image | ⚪ | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ |
 | test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
 | test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ |
 | test_chat_streaming_tool_calling | ❌ | ❌ | ❌ |
+| test_chat_streaming_tool_choice_none | ✅ | ✅ | ✅ |
+| test_chat_streaming_tool_choice_required | ✅ | ❌ | ❌ |
 
 ## Openai
 
-*Tests run on: 2025-04-10 16:47:28*
+*Tests run on: 2025-04-17 12:34:08*
 
 ```bash
 # Run all tests for this provider:
 pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -v
 
-# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
-pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_non_streaming_basic and earth"
+# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_multi_turn_multiple_images and stream=False"
 ```
 
 
@@ -118,15 +202,31 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai
 
 | Test | gpt-4o | gpt-4o-mini |
 | --- | --- | --- |
+| test_chat_multi_turn_multiple_images (stream=False) | ✅ | ✅ |
+| test_chat_multi_turn_multiple_images (stream=True) | ✅ | ✅ |
 | test_chat_non_streaming_basic (earth) | ✅ | ✅ |
 | test_chat_non_streaming_basic (saturn) | ✅ | ✅ |
 | test_chat_non_streaming_image | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ |
 | test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ |
 | test_chat_non_streaming_structured_output (math) | ✅ | ✅ |
 | test_chat_non_streaming_tool_calling | ✅ | ✅ |
+| test_chat_non_streaming_tool_choice_none | ✅ | ✅ |
+| test_chat_non_streaming_tool_choice_required | ✅ | ✅ |
 | test_chat_streaming_basic (earth) | ✅ | ✅ |
 | test_chat_streaming_basic (saturn) | ✅ | ✅ |
 | test_chat_streaming_image | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ |
 | test_chat_streaming_structured_output (calendar) | ✅ | ✅ |
 | test_chat_streaming_structured_output (math) | ✅ | ✅ |
 | test_chat_streaming_tool_calling | ✅ | ✅ |
+| test_chat_streaming_tool_choice_none | ✅ | ✅ |
+| test_chat_streaming_tool_choice_required | ✅ | ✅ |
diff --git a/tests/verifications/conf/cerebras.yaml b/tests/verifications/conf/cerebras.yaml
index 5b19b4916..37fc713d6 100644
--- a/tests/verifications/conf/cerebras.yaml
+++ b/tests/verifications/conf/cerebras.yaml
@@ -8,3 +8,4 @@ test_exclusions:
   llama-3.3-70b:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conf/fireworks-llama-stack.yaml b/tests/verifications/conf/fireworks-llama-stack.yaml
new file mode 100644
index 000000000..dffd7c739
--- /dev/null
+++ b/tests/verifications/conf/fireworks-llama-stack.yaml
@@ -0,0 +1,17 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: FIREWORKS_API_KEY
+models:
+- fireworks/llama-v3p3-70b-instruct
+- fireworks/llama4-scout-instruct-basic
+- fireworks/llama4-maverick-instruct-basic
+model_display_names:
+  fireworks/llama-v3p3-70b-instruct: Llama-3.3-70B-Instruct
+  fireworks/llama4-scout-instruct-basic: Llama-4-Scout-Instruct
+  fireworks/llama4-maverick-instruct-basic: Llama-4-Maverick-Instruct
+test_exclusions:
+  fireworks/llama-v3p3-70b-instruct:
+  - test_chat_non_streaming_image
+  - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
+  - test_response_non_streaming_image
+  - test_response_non_streaming_multi_turn_image
diff --git a/tests/verifications/conf/fireworks.yaml b/tests/verifications/conf/fireworks.yaml
index f55b707ba..9bb21f706 100644
--- a/tests/verifications/conf/fireworks.yaml
+++ b/tests/verifications/conf/fireworks.yaml
@@ -12,3 +12,4 @@ test_exclusions:
   accounts/fireworks/models/llama-v3p3-70b-instruct:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conf/groq-llama-stack.yaml b/tests/verifications/conf/groq-llama-stack.yaml
new file mode 100644
index 000000000..786b79c24
--- /dev/null
+++ b/tests/verifications/conf/groq-llama-stack.yaml
@@ -0,0 +1,17 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: GROQ_API_KEY
+models:
+- groq/llama-3.3-70b-versatile
+- groq/llama-4-scout-17b-16e-instruct
+- groq/llama-4-maverick-17b-128e-instruct
+model_display_names:
+  groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
+  groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
+  groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
+test_exclusions:
+  groq/llama-3.3-70b-versatile:
+  - test_chat_non_streaming_image
+  - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
+  - test_response_non_streaming_image
+  - test_response_non_streaming_multi_turn_image
diff --git a/tests/verifications/conf/groq.yaml b/tests/verifications/conf/groq.yaml
index 7871036dc..bc3de58e9 100644
--- a/tests/verifications/conf/groq.yaml
+++ b/tests/verifications/conf/groq.yaml
@@ -2,13 +2,14 @@ base_url: https://api.groq.com/openai/v1
 api_key_var: GROQ_API_KEY
 models:
 - llama-3.3-70b-versatile
-- llama-4-scout-17b-16e-instruct
-- llama-4-maverick-17b-128e-instruct
+- meta-llama/llama-4-scout-17b-16e-instruct
+- meta-llama/llama-4-maverick-17b-128e-instruct
 model_display_names:
   llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
-  llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
-  llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
+  meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
+  meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
 test_exclusions:
   llama-3.3-70b-versatile:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conf/meta_reference.yaml b/tests/verifications/conf/meta_reference.yaml
new file mode 100644
index 000000000..fb2680fe0
--- /dev/null
+++ b/tests/verifications/conf/meta_reference.yaml
@@ -0,0 +1,8 @@
+# LLAMA_STACK_PORT=5002 llama stack run meta-reference-gpu --env INFERENCE_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct --env INFERENCE_CHECKPOINT_DIR=<path_to_ckpt>
+base_url: http://localhost:5002/v1/openai/v1
+api_key_var: foo
+models:
+- meta-llama/Llama-4-Scout-17B-16E-Instruct
+model_display_names:
+  meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct
+test_exclusions: {}
diff --git a/tests/verifications/conf/openai-llama-stack.yaml b/tests/verifications/conf/openai-llama-stack.yaml
new file mode 100644
index 000000000..de35439ae
--- /dev/null
+++ b/tests/verifications/conf/openai-llama-stack.yaml
@@ -0,0 +1,9 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: OPENAI_API_KEY
+models:
+- openai/gpt-4o
+- openai/gpt-4o-mini
+model_display_names:
+  openai/gpt-4o: gpt-4o
+  openai/gpt-4o-mini: gpt-4o-mini
+test_exclusions: {}
diff --git a/tests/verifications/conf/together-llama-stack.yaml b/tests/verifications/conf/together-llama-stack.yaml
new file mode 100644
index 000000000..58cbcfa93
--- /dev/null
+++ b/tests/verifications/conf/together-llama-stack.yaml
@@ -0,0 +1,17 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: TOGETHER_API_KEY
+models:
+- together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+- together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+model_display_names:
+  together/meta-llama/Llama-3.3-70B-Instruct-Turbo: Llama-3.3-70B-Instruct
+  together/meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct
+  together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8: Llama-4-Maverick-Instruct
+test_exclusions:
+  together/meta-llama/Llama-3.3-70B-Instruct-Turbo:
+  - test_chat_non_streaming_image
+  - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
+  - test_response_non_streaming_image
+  - test_response_non_streaming_multi_turn_image
diff --git a/tests/verifications/conf/together.yaml b/tests/verifications/conf/together.yaml
index 258616662..e8fb62ab9 100644
--- a/tests/verifications/conf/together.yaml
+++ b/tests/verifications/conf/together.yaml
@@ -12,3 +12,4 @@ test_exclusions:
   meta-llama/Llama-3.3-70B-Instruct-Turbo:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conftest.py b/tests/verifications/conftest.py
index 0b4a6feb7..030efcde9 100644
--- a/tests/verifications/conftest.py
+++ b/tests/verifications/conftest.py
@@ -25,6 +25,11 @@ def pytest_addoption(parser):
         action="store",
         help="Provider to use for testing",
     )
+    parser.addoption(
+        "--model",
+        action="store",
+        help="Model to use for testing",
+    )
 
 
 pytest_plugins = [
diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py
index 6a7c39ee2..67ef14e90 100755
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@@ -1,16 +1,10 @@
+#!/usr/bin/env python3
+
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "pytest-json-report",
-#     "pyyaml",
-# ]
-# ///
 """
 Test Report Generator
 
@@ -24,7 +18,7 @@ Description:
 
 
 Configuration:
-    - Provider details (models, display names) are loaded from `tests/verifications/config.yaml`.
+    - Provider details (models, display names) are loaded from `tests/verifications/conf/*.yaml`.
     - Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`.
     - Test results are stored in `tests/verifications/test_results/`.
 
@@ -56,7 +50,7 @@ import subprocess
 import time
 from collections import defaultdict
 from pathlib import Path
-from typing import Any, DefaultDict, Dict, Set, Tuple
+from typing import Any
 
 from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
 
@@ -67,7 +61,12 @@ RESULTS_DIR.mkdir(exist_ok=True)
 # Maximum number of test result files to keep per provider
 MAX_RESULTS_PER_PROVIDER = 1
 
-PROVIDER_ORDER = ["together", "fireworks", "groq", "cerebras", "openai"]
+DEFAULT_PROVIDERS = [
+    "meta_reference",
+    "together",
+    "fireworks",
+    "openai",
+]
 
 VERIFICATION_CONFIG = _load_all_verification_configs()
 
@@ -107,7 +106,7 @@ def run_tests(provider, keyword=None):
 
         # Check if the JSON file was created
         if temp_json_file.exists():
-            with open(temp_json_file, "r") as f:
+            with open(temp_json_file) as f:
                 test_results = json.load(f)
 
             test_results["run_timestamp"] = timestamp
@@ -132,9 +131,17 @@ def run_tests(provider, keyword=None):
         return None
 
 
+def run_multiple_tests(providers_to_run: list[str], keyword: str | None):
+    """Runs tests for a list of providers."""
+    print(f"Running tests for providers: {', '.join(providers_to_run)}")
+    for provider in providers_to_run:
+        run_tests(provider.strip(), keyword=keyword)
+    print("Finished running tests.")
+
+
 def parse_results(
     result_file,
-) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str], str]:
+) -> tuple[defaultdict[str, defaultdict[str, dict[str, bool]]], defaultdict[str, set[str]], set[str], str]:
     """Parse a single test results file.
 
     Returns:
@@ -149,13 +156,13 @@ def parse_results(
         # Return empty defaultdicts/set matching the type hint
         return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), ""
 
-    with open(result_file, "r") as f:
+    with open(result_file) as f:
         results = json.load(f)
 
     # Initialize results dictionary with specific types
-    parsed_results: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] = defaultdict(lambda: defaultdict(dict))
-    providers_in_file: DefaultDict[str, Set[str]] = defaultdict(set)
-    tests_in_file: Set[str] = set()
+    parsed_results: defaultdict[str, defaultdict[str, dict[str, bool]]] = defaultdict(lambda: defaultdict(dict))
+    providers_in_file: defaultdict[str, set[str]] = defaultdict(set)
+    tests_in_file: set[str] = set()
     # Extract provider from filename (e.g., "openai.json" -> "openai")
     provider: str = result_file.stem
 
@@ -240,25 +247,11 @@ def parse_results(
     return parsed_results, providers_in_file, tests_in_file, run_timestamp_str
 
 
-def get_all_result_files_by_provider():
-    """Get all test result files, keyed by provider."""
-    provider_results = {}
-
-    result_files = list(RESULTS_DIR.glob("*.json"))
-
-    for file in result_files:
-        provider = file.stem
-        if provider:
-            provider_results[provider] = file
-
-    return provider_results
-
-
 def generate_report(
-    results_dict: Dict[str, Any],
-    providers: Dict[str, Set[str]],
-    all_tests: Set[str],
-    provider_timestamps: Dict[str, str],
+    results_dict: dict[str, Any],
+    providers: dict[str, set[str]],
+    all_tests: set[str],
+    provider_timestamps: dict[str, str],
     output_file=None,
 ):
     """Generate the markdown report.
@@ -266,6 +259,7 @@ def generate_report(
     Args:
         results_dict: Aggregated results [provider][model][test_name] -> status.
         providers: Dict of all providers and their models {provider: {models}}.
+                   The order of keys in this dict determines the report order.
         all_tests: Set of all test names found.
         provider_timestamps: Dict of provider to timestamp when tests were run
         output_file: Optional path to save the report.
@@ -283,8 +277,8 @@ def generate_report(
     sorted_tests = sorted(all_tests)
 
     # Calculate counts for each base test name
-    base_test_case_counts: DefaultDict[str, int] = defaultdict(int)
-    base_test_name_map: Dict[str, str] = {}
+    base_test_case_counts: defaultdict[str, int] = defaultdict(int)
+    base_test_name_map: dict[str, str] = {}
     for test_name in sorted_tests:
         match = re.match(r"^(.*?)( \([^)]+\))?$", test_name)
         if match:
@@ -343,22 +337,17 @@ def generate_report(
                                 passed_tests += 1
         provider_totals[provider] = (provider_passed, provider_total)
 
-    # Add summary table (use passed-in providers dict)
+    # Add summary table (use the order from the providers dict keys)
     report.append("| Provider | Pass Rate | Tests Passed | Total Tests |")
     report.append("| --- | --- | --- | --- |")
-    for provider in [p for p in PROVIDER_ORDER if p in providers]:  # Check against keys of passed-in dict
-        passed, total = provider_totals.get(provider, (0, 0))
-        pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A"
-        report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |")
-    for provider in [p for p in providers if p not in PROVIDER_ORDER]:  # Check against keys of passed-in dict
+    # Iterate through providers in the order they appear in the input dict
+    for provider in providers_sorted.keys():
         passed, total = provider_totals.get(provider, (0, 0))
         pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A"
         report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |")
     report.append("\n")
 
-    for provider in sorted(
-        providers_sorted.keys(), key=lambda p: (PROVIDER_ORDER.index(p) if p in PROVIDER_ORDER else float("inf"), p)
-    ):
+    for provider in providers_sorted.keys():
         provider_models = providers_sorted[provider]  # Use sorted models
         if not provider_models:
             continue
@@ -451,60 +440,62 @@ def main():
         "--providers",
         type=str,
         nargs="+",
-        help="Specify providers to test (comma-separated or space-separated, default: all)",
+        help="Specify providers to include/test (comma-separated or space-separated, default: uses DEFAULT_PROVIDERS)",
     )
     parser.add_argument("--output", type=str, help="Output file location (default: tests/verifications/REPORT.md)")
     parser.add_argument("--k", type=str, help="Keyword expression to filter tests (passed to pytest -k)")
     args = parser.parse_args()
 
     all_results = {}
-    # Initialize collections to aggregate results in main
-    aggregated_providers = defaultdict(set)
+    final_providers_order = {}  # Dictionary to store results, preserving processing order
     aggregated_tests = set()
     provider_timestamps = {}
 
-    if args.run_tests:
-        # Get list of available providers from command line or use detected providers
-        if args.providers:
-            # Handle both comma-separated and space-separated lists
-            test_providers = []
-            for provider_arg in args.providers:
-                # Split by comma if commas are present
-                if "," in provider_arg:
-                    test_providers.extend(provider_arg.split(","))
-                else:
-                    test_providers.append(provider_arg)
-        else:
-            # Default providers to test
-            test_providers = PROVIDER_ORDER
-
-        for provider in test_providers:
-            provider = provider.strip()  # Remove any whitespace
-            result_file = run_tests(provider, keyword=args.k)
-            if result_file:
-                # Parse and aggregate results
-                parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file)
-                all_results.update(parsed_results)
-                for prov, models in providers_in_file.items():
-                    aggregated_providers[prov].update(models)
-                    if run_timestamp:
-                        provider_timestamps[prov] = run_timestamp
-                aggregated_tests.update(tests_in_file)
+    # 1. Determine the desired list and order of providers
+    if args.providers:
+        desired_providers = []
+        for provider_arg in args.providers:
+            desired_providers.extend([p.strip() for p in provider_arg.split(",")])
     else:
-        # Use existing results
-        provider_result_files = get_all_result_files_by_provider()
+        desired_providers = DEFAULT_PROVIDERS  # Use default order/list
 
-        for result_file in provider_result_files.values():
-            # Parse and aggregate results
-            parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file)
-            all_results.update(parsed_results)
-            for prov, models in providers_in_file.items():
-                aggregated_providers[prov].update(models)
-                if run_timestamp:
-                    provider_timestamps[prov] = run_timestamp
-            aggregated_tests.update(tests_in_file)
+    # 2. Run tests if requested (using the desired provider list)
+    if args.run_tests:
+        run_multiple_tests(desired_providers, args.k)
 
-    generate_report(all_results, aggregated_providers, aggregated_tests, provider_timestamps, args.output)
+    for provider in desired_providers:
+        # Construct the expected result file path directly
+        result_file = RESULTS_DIR / f"{provider}.json"
+
+        if result_file.exists():  # Check if the specific file exists
+            print(f"Loading results for {provider} from {result_file}")
+            try:
+                parsed_data = parse_results(result_file)
+                parsed_results, providers_in_file, tests_in_file, run_timestamp = parsed_data
+                all_results.update(parsed_results)
+                aggregated_tests.update(tests_in_file)
+
+                # Add models for this provider, ensuring it's added in the correct report order
+                if provider in providers_in_file:
+                    if provider not in final_providers_order:
+                        final_providers_order[provider] = set()
+                    final_providers_order[provider].update(providers_in_file[provider])
+                    if run_timestamp != "Unknown":
+                        provider_timestamps[provider] = run_timestamp
+                else:
+                    print(
+                        f"Warning: Provider '{provider}' found in desired list but not within its result file data ({result_file})."
+                    )
+
+            except Exception as e:
+                print(f"Error parsing results for provider {provider} from {result_file}: {e}")
+        else:
+            # Only print warning if we expected results (i.e., provider was in the desired list)
+            print(f"Result file for desired provider '{provider}' not found at {result_file}. Skipping.")
+
+    # 5. Generate the report using the filtered & ordered results
+    print(f"Final Provider Order for Report: {list(final_providers_order.keys())}")
+    generate_report(all_results, final_providers_order, aggregated_tests, provider_timestamps, args.output)
 
 
 if __name__ == "__main__":
diff --git a/tests/verifications/openai-api-verification-run.yaml b/tests/verifications/openai-api-verification-run.yaml
new file mode 100644
index 000000000..d6d8cd07d
--- /dev/null
+++ b/tests/verifications/openai-api-verification-run.yaml
@@ -0,0 +1,162 @@
+# This is a temporary run file because model names used by the verification tests
+# are not quite consistent with various pre-existing distributions.
+#
+version: '2'
+image_name: openai-api-verification
+apis:
+- agents
+- inference
+- telemetry
+- tool_runtime
+- vector_io
+- safety
+providers:
+  inference:
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      url: https://api.openai.com/v1
+      api_key: ${env.OPENAI_API_KEY:}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/faiss_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai-api-verification}/trace_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/responses_store.db
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+  - provider_id: wolfram-alpha
+    provider_type: remote::wolfram-alpha
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/registry.db
+models:
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: fireworks/llama-v3p3-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: fireworks/llama4-scout-instruct-basic
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
+  model_type: llm
+- metadata: {}
+  model_id: fireworks/llama4-maverick-instruct-basic
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-3.3-70b-versatile
+  provider_id: groq
+  provider_model_id: groq/llama-3.3-70b-versatile
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-4-scout-17b-16e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-4-maverick-17b-128e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: openai/gpt-4o
+  provider_id: openai
+  provider_model_id: openai/gpt-4o
+  model_type: llm
+- metadata: {}
+  model_id: openai/gpt-4o-mini
+  provider_id: openai
+  provider_model_id: openai/gpt-4o-mini
+  model_type: llm
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::wolfram_alpha
+  provider_id: wolfram-alpha
+server:
+  port: 8321
diff --git a/tests/verifications/openai_api/conftest.py b/tests/verifications/openai_api/conftest.py
new file mode 100644
index 000000000..9d773b8de
--- /dev/null
+++ b/tests/verifications/openai_api/conftest.py
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
+
+
+def pytest_generate_tests(metafunc):
+    """Dynamically parametrize tests based on the selected provider and config."""
+    if "model" in metafunc.fixturenames:
+        model = metafunc.config.getoption("model")
+        if model:
+            metafunc.parametrize("model", [model])
+            return
+
+        provider = metafunc.config.getoption("provider")
+        if not provider:
+            print("Warning: --provider not specified. Skipping model parametrization.")
+            metafunc.parametrize("model", [])
+            return
+
+        try:
+            config_data = _load_all_verification_configs()
+        except (OSError, FileNotFoundError) as e:
+            print(f"ERROR loading verification configs: {e}")
+            config_data = {"providers": {}}
+
+        provider_config = config_data.get("providers", {}).get(provider)
+        if provider_config:
+            models = provider_config.get("models", [])
+            if models:
+                metafunc.parametrize("model", models)
+            else:
+                print(f"Warning: No models found for provider '{provider}' in config.")
+                metafunc.parametrize("model", [])  # Parametrize empty if no models found
+        else:
+            print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
+            metafunc.parametrize("model", [])  # Parametrize empty if provider not found
diff --git a/tests/verifications/openai_api/fixtures/fixtures.py b/tests/verifications/openai_api/fixtures/fixtures.py
index 4f8c2e017..a3be7e402 100644
--- a/tests/verifications/openai_api/fixtures/fixtures.py
+++ b/tests/verifications/openai_api/fixtures/fixtures.py
@@ -5,14 +5,18 @@
 # the root directory of this source tree.
 
 import os
+import re
 from pathlib import Path
 
 import pytest
 import yaml
 from openai import OpenAI
 
+from llama_stack import LlamaStackAsLibraryClient
+
+# --- Helper Functions ---
+
 
-# --- Helper Function to Load Config ---
 def _load_all_verification_configs():
     """Load and aggregate verification configs from the conf/ directory."""
     # Note: Path is relative to *this* file (fixtures.py)
@@ -31,7 +35,7 @@ def _load_all_verification_configs():
     for config_path in yaml_files:
         provider_name = config_path.stem
         try:
-            with open(config_path, "r") as f:
+            with open(config_path) as f:
                 provider_config = yaml.safe_load(f)
                 if provider_config:
                     all_provider_configs[provider_name] = provider_config
@@ -39,12 +43,35 @@ def _load_all_verification_configs():
                     # Log warning if possible, or just skip empty files silently
                     print(f"Warning: Config file {config_path} is empty or invalid.")
         except Exception as e:
-            raise IOError(f"Error loading config file {config_path}: {e}") from e
+            raise OSError(f"Error loading config file {config_path}: {e}") from e
 
     return {"providers": all_provider_configs}
 
 
-# --- End Helper Function ---
+def case_id_generator(case):
+    """Generate a test ID from the case's 'case_id' field, or use a default."""
+    case_id = case.get("case_id")
+    if isinstance(case_id, str | int):
+        return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
+    return None
+
+
+def should_skip_test(verification_config, provider, model, test_name_base):
+    """Check if a test should be skipped based on config exclusions."""
+    provider_config = verification_config.get("providers", {}).get(provider)
+    if not provider_config:
+        return False  # No config for provider, don't skip
+
+    exclusions = provider_config.get("test_exclusions", {}).get(model, [])
+    return test_name_base in exclusions
+
+
+# Helper to get the base test name from the request object
+def get_base_test_name(request):
+    return request.node.originalname
+
+
+# --- End Helper Functions ---
 
 
 @pytest.fixture(scope="session")
@@ -52,11 +79,11 @@ def verification_config():
     """Pytest fixture to provide the loaded verification config."""
     try:
         return _load_all_verification_configs()
-    except (FileNotFoundError, IOError) as e:
+    except (OSError, FileNotFoundError) as e:
         pytest.fail(str(e))  # Fail test collection if config loading fails
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def provider(request, verification_config):
     provider = request.config.getoption("--provider")
     base_url = request.config.getoption("--base-url")
@@ -75,12 +102,14 @@ def provider(request, verification_config):
     return provider
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def base_url(request, provider, verification_config):
-    return request.config.getoption("--base-url") or verification_config["providers"][provider]["base_url"]
+    return request.config.getoption("--base-url") or verification_config.get("providers", {}).get(provider, {}).get(
+        "base_url"
+    )
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def api_key(request, provider, verification_config):
     provider_conf = verification_config.get("providers", {}).get(provider, {})
     api_key_env_var = provider_conf.get("api_key_var")
@@ -97,8 +126,21 @@ def model_mapping(provider, providers_model_mapping):
     return providers_model_mapping[provider]
 
 
-@pytest.fixture
-def openai_client(base_url, api_key):
+@pytest.fixture(scope="session")
+def openai_client(base_url, api_key, provider):
+    # Simplify running against a local Llama Stack
+    if base_url and "localhost" in base_url and not api_key:
+        api_key = "empty"
+    if provider.startswith("stack:"):
+        parts = provider.split(":")
+        if len(parts) != 2:
+            raise ValueError(f"Invalid config for Llama Stack: {provider}, it must be of the form 'stack:<config>'")
+        config = parts[1]
+        client = LlamaStackAsLibraryClient(config, skip_logger_removal=True)
+        if not client.initialize():
+            raise RuntimeError("Initialization failed")
+        return client
+
     return OpenAI(
         base_url=base_url,
         api_key=api_key,
diff --git a/tests/verifications/openai_api/fixtures/images/vision_test_1.jpg b/tests/verifications/openai_api/fixtures/images/vision_test_1.jpg
new file mode 100644
index 000000000..32fd0c0e3
Binary files /dev/null and b/tests/verifications/openai_api/fixtures/images/vision_test_1.jpg differ
diff --git a/tests/verifications/openai_api/fixtures/images/vision_test_2.jpg b/tests/verifications/openai_api/fixtures/images/vision_test_2.jpg
new file mode 100644
index 000000000..f9c28e3d5
Binary files /dev/null and b/tests/verifications/openai_api/fixtures/images/vision_test_2.jpg differ
diff --git a/tests/verifications/openai_api/fixtures/images/vision_test_3.jpg b/tests/verifications/openai_api/fixtures/images/vision_test_3.jpg
new file mode 100644
index 000000000..63165ea86
Binary files /dev/null and b/tests/verifications/openai_api/fixtures/images/vision_test_3.jpg differ
diff --git a/tests/verifications/openai_api/fixtures/load.py b/tests/verifications/openai_api/fixtures/load.py
index 98580b2a1..0184ee146 100644
--- a/tests/verifications/openai_api/fixtures/load.py
+++ b/tests/verifications/openai_api/fixtures/load.py
@@ -12,5 +12,5 @@ import yaml
 def load_test_cases(name: str):
     fixture_dir = Path(__file__).parent / "test_cases"
     yaml_path = fixture_dir / f"{name}.yaml"
-    with open(yaml_path, "r") as f:
+    with open(yaml_path) as f:
         return yaml.safe_load(f)
diff --git a/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml
index 78ea8245d..0c9f1fe9e 100644
--- a/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml
@@ -15,6 +15,52 @@ test_chat_basic:
             S?
           role: user
       output: Saturn
+test_chat_input_validation:
+  test_name: test_chat_input_validation
+  test_params:
+    case:
+    - case_id: "messages_missing"
+      input:
+        messages: []
+      output:
+        error:
+          status_code: 400
+    - case_id: "messages_role_invalid"
+      input:
+        messages:
+        - content: Which planet do humans live on?
+          role: fake_role
+      output:
+        error:
+          status_code: 400
+    - case_id: "tool_choice_invalid"
+      input:
+        messages:
+        - content: Which planet do humans live on?
+          role: user
+        tool_choice: invalid
+      output:
+        error:
+          status_code: 400
+    - case_id: "tool_choice_no_tools"
+      input:
+        messages:
+        - content: Which planet do humans live on?
+          role: user
+        tool_choice: required
+      output:
+        error:
+          status_code: 400
+    - case_id: "tools_type_invalid"
+      input:
+        messages:
+        - content: Which planet do humans live on?
+          role: user
+        tools:
+        - type: invalid
+      output:
+        error:
+          status_code: 400
 test_chat_image:
   test_name: test_chat_image
   test_params:
@@ -131,3 +177,221 @@ test_tool_calling:
               type: object
           type: function
       output: get_weather_tool_call
+
+test_chat_multi_turn_tool_calling:
+  test_name: test_chat_multi_turn_tool_calling
+  test_params:
+    case:
+    - case_id: "text_then_weather_tool"
+      input:
+        messages:
+        - - role: user
+            content: "What's the name of the Sun in latin?"
+        - - role: user
+            content: "What's the weather like in San Francisco?"
+        tools:
+        - function:
+            description: Get the current weather
+            name: get_weather
+            parameters:
+              type: object
+              properties:
+                location:
+                  description: "The city and state (both required), e.g. San Francisco, CA."
+                  type: string
+              required: ["location"]
+          type: function
+      tool_responses:
+      - response: "{'response': '70 degrees and foggy'}"
+      expected:
+      - num_tool_calls: 0
+        answer: ["sol"]
+      - num_tool_calls: 1
+        tool_name: get_weather
+        tool_arguments:
+          location: "San Francisco, CA"
+      - num_tool_calls: 0
+        answer: ["foggy", "70 degrees"]
+    - case_id: "weather_tool_then_text"
+      input:
+        messages:
+        - - role: user
+            content: "What's the weather like in San Francisco?"
+        tools:
+        - function:
+            description: Get the current weather
+            name: get_weather
+            parameters:
+              type: object
+              properties:
+                location:
+                  description: "The city and state (both required), e.g. San Francisco, CA."
+                  type: string
+              required: ["location"]
+          type: function
+      tool_responses:
+      - response: "{'response': '70 degrees and foggy'}"
+      expected:
+      - num_tool_calls: 1
+        tool_name: get_weather
+        tool_arguments:
+          location: "San Francisco, CA"
+      - num_tool_calls: 0
+        answer: ["foggy", "70 degrees"]
+    - case_id: "add_product_tool"
+      input:
+        messages:
+        - - role: user
+            content: "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
+        tools:
+        - function:
+            description: Add a new product
+            name: addProduct
+            parameters:
+              type: object
+              properties:
+                name:
+                  description: "Name of the product"
+                  type: string
+                price:
+                  description: "Price of the product"
+                  type: number
+                inStock:
+                  description: "Availability status of the product."
+                  type: boolean
+                tags:
+                  description: "List of product tags"
+                  type: array
+                  items:
+                    type: string
+              required: ["name", "price", "inStock"]
+          type: function
+      tool_responses:
+      - response: "{'response': 'Successfully added product with id: 123'}"
+      expected:
+      - num_tool_calls: 1
+        tool_name: addProduct
+        tool_arguments:
+          name: "Widget"
+          price: 19.99
+          inStock: true
+          tags:
+          - "new"
+          - "sale"
+      - num_tool_calls: 0
+        answer: ["123", "product id: 123"]
+    - case_id: "get_then_create_event_tool"
+      input:
+        messages:
+        - - role: system
+            content: "Todays date is 2025-03-01."
+          - role: user
+            content: "Do i have any meetings on March 3rd at 10 am? Yes or no?"
+        - - role: user
+            content: "Alright then, Create an event named 'Team Building', scheduled for that time same time, in the 'Main Conference Room' and add Alice, Bob, Charlie to it. Give me the created event id."
+        tools:
+        - function:
+            description: Create a new event
+            name: create_event
+            parameters:
+              type: object
+              properties:
+                name:
+                  description: "Name of the event"
+                  type: string
+                date:
+                  description: "Date of the event in ISO format"
+                  type: string
+                time:
+                  description: "Event Time (HH:MM)"
+                  type: string
+                location:
+                  description: "Location of the event"
+                  type: string
+                participants:
+                  description: "List of participant names"
+                  type: array
+                  items:
+                    type: string
+              required: ["name", "date", "time", "location", "participants"]
+          type: function
+        - function:
+            description: Get an event by date and time
+            name: get_event
+            parameters:
+              type: object
+              properties:
+                date:
+                  description: "Date of the event in ISO format"
+                  type: string
+                time:
+                  description: "Event Time (HH:MM)"
+                  type: string
+              required: ["date", "time"]
+          type: function
+      tool_responses:
+      - response: "{'response': 'No events found for 2025-03-03 at 10:00'}"
+      - response: "{'response': 'Successfully created new event with id: e_123'}"
+      expected:
+      - num_tool_calls: 1
+        tool_name: get_event
+        tool_arguments:
+          date: "2025-03-03"
+          time: "10:00"
+      - num_tool_calls: 0
+        answer: ["no", "no events found", "no meetings"]
+      - num_tool_calls: 1
+        tool_name: create_event
+        tool_arguments:
+          name: "Team Building"
+          date: "2025-03-03"
+          time: "10:00"
+          location: "Main Conference Room"
+          participants:
+          - "Alice"
+          - "Bob"
+          - "Charlie"
+      - num_tool_calls: 0
+        answer: ["e_123", "event id: e_123"]
+    - case_id: "compare_monthly_expense_tool"
+      input:
+        messages:
+        - - role: system
+            content: "Todays date is 2025-03-01."
+          - role: user
+            content: "what was my monthly expense in Jan of this year?"
+        - - role: user
+            content: "Was it less than Feb of last year? Only answer with yes or no."
+        tools:
+        - function:
+            description: Get monthly expense summary
+            name: getMonthlyExpenseSummary
+            parameters:
+              type: object
+              properties:
+                month:
+                  description: "Month of the year (1-12)"
+                  type: integer
+                year:
+                  description: "Year"
+                  type: integer
+              required: ["month", "year"]
+          type: function
+      tool_responses:
+      - response: "{'response': 'Total expenses for January 2025: $1000'}"
+      - response: "{'response': 'Total expenses for February 2024: $2000'}"
+      expected:
+      - num_tool_calls: 1
+        tool_name: getMonthlyExpenseSummary
+        tool_arguments:
+          month: 1
+          year: 2025
+      - num_tool_calls: 0
+        answer: ["1000", "$1,000", "1,000"]
+      - num_tool_calls: 1
+        tool_name: getMonthlyExpenseSummary
+        tool_arguments:
+          month: 2
+          year: 2024
+      - num_tool_calls: 0
+        answer: ["yes"]
diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
new file mode 100644
index 000000000..51c7814a3
--- /dev/null
+++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
@@ -0,0 +1,96 @@
+test_response_basic:
+  test_name: test_response_basic
+  test_params:
+    case:
+    - case_id: "earth"
+      input: "Which planet do humans live on?"
+      output: "earth"
+    - case_id: "saturn"
+      input: "Which planet has rings around it with a name starting with letter S?"
+      output: "saturn"
+
+test_response_multi_turn:
+  test_name: test_response_multi_turn
+  test_params:
+    case:
+    - case_id: "earth"
+      turns:
+      - input: "Which planet do humans live on?"
+        output: "earth"
+      - input: "What is the name of the planet from your previous response?"
+        output: "earth"
+
+test_response_web_search:
+  test_name: test_response_web_search
+  test_params:
+    case:
+    - case_id: "llama_experts"
+      input: "How many experts does the Llama 4 Maverick model have?"
+      tools:
+      - type: web_search
+        search_context_size: "low"
+      output: "128"
+
+test_response_mcp_tool:
+  test_name: test_response_mcp_tool
+  test_params:
+    case:
+    - case_id: "boiling_point_tool"
+      input: "What is the boiling point of polyjuice?"
+      tools:
+      - type: mcp
+        server_label: "localmcp"
+        server_url: "<FILLED_BY_TEST_RUNNER>"
+      output: "Hello, world!"
+
+test_response_custom_tool:
+  test_name: test_response_custom_tool
+  test_params:
+    case:
+    - case_id: "sf_weather"
+      input: "What's the weather like in San Francisco?"
+      tools:
+      - type: function
+        name: get_weather
+        description: Get current temperature for a given location.
+        parameters:
+          additionalProperties: false
+          properties:
+            location:
+              description: "City and country e.g. Bogot\xE1, Colombia"
+              type: string
+          required:
+          - location
+          type: object
+
+test_response_image:
+  test_name: test_response_image
+  test_params:
+    case:
+    - case_id: "llama_image"
+      input:
+      - role: user
+        content:
+        - type: input_text
+          text: "Identify the type of animal in this image."
+        - type: input_image
+          image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
+      output: "llama"
+
+# the models are really poor at tool calling after seeing images :/
+test_response_multi_turn_image:
+  test_name: test_response_multi_turn_image
+  test_params:
+    case:
+    - case_id: "llama_image_understanding"
+      turns:
+      - input:
+        - role: user
+          content:
+          - type: input_text
+            text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
+          - type: input_image
+            image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
+        output: "llama"
+      - input: "What country do you find this animal primarily in? What continent?"
+        output: "peru"
diff --git a/tests/verifications/openai_api/test_chat_completion.py b/tests/verifications/openai_api/test_chat_completion.py
index 6aee29c3a..64e49d352 100644
--- a/tests/verifications/openai_api/test_chat_completion.py
+++ b/tests/verifications/openai_api/test_chat_completion.py
@@ -4,68 +4,41 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import base64
+import copy
 import json
-import re
+from pathlib import Path
 from typing import Any
 
 import pytest
+from openai import APIError
 from pydantic import BaseModel
 
-from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
+from tests.verifications.openai_api.fixtures.fixtures import (
+    case_id_generator,
+    get_base_test_name,
+    should_skip_test,
+)
 from tests.verifications.openai_api.fixtures.load import load_test_cases
 
 chat_completion_test_cases = load_test_cases("chat_completion")
 
-
-def case_id_generator(case):
-    """Generate a test ID from the case's 'case_id' field, or use a default."""
-    case_id = case.get("case_id")
-    if isinstance(case_id, (str, int)):
-        return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
-    return None
+THIS_DIR = Path(__file__).parent
 
 
-def pytest_generate_tests(metafunc):
-    """Dynamically parametrize tests based on the selected provider and config."""
-    if "model" in metafunc.fixturenames:
-        provider = metafunc.config.getoption("provider")
-        if not provider:
-            print("Warning: --provider not specified. Skipping model parametrization.")
-            metafunc.parametrize("model", [])
-            return
-
-        try:
-            config_data = _load_all_verification_configs()
-        except (FileNotFoundError, IOError) as e:
-            print(f"ERROR loading verification configs: {e}")
-            config_data = {"providers": {}}
-
-        provider_config = config_data.get("providers", {}).get(provider)
-        if provider_config:
-            models = provider_config.get("models", [])
-            if models:
-                metafunc.parametrize("model", models)
-            else:
-                print(f"Warning: No models found for provider '{provider}' in config.")
-                metafunc.parametrize("model", [])  # Parametrize empty if no models found
-        else:
-            print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
-            metafunc.parametrize("model", [])  # Parametrize empty if provider not found
-
-
-def should_skip_test(verification_config, provider, model, test_name_base):
-    """Check if a test should be skipped based on config exclusions."""
-    provider_config = verification_config.get("providers", {}).get(provider)
-    if not provider_config:
-        return False  # No config for provider, don't skip
-
-    exclusions = provider_config.get("test_exclusions", {}).get(model, [])
-    return test_name_base in exclusions
-
-
-# Helper to get the base test name from the request object
-def get_base_test_name(request):
-    return request.node.originalname
+@pytest.fixture
+def multi_image_data():
+    files = [
+        THIS_DIR / "fixtures/images/vision_test_1.jpg",
+        THIS_DIR / "fixtures/images/vision_test_2.jpg",
+        THIS_DIR / "fixtures/images/vision_test_3.jpg",
+    ]
+    encoded_files = []
+    for file in files:
+        with open(file, "rb") as image_file:
+            base64_data = base64.b64encode(image_file.read()).decode("utf-8")
+            encoded_files.append(f"data:image/jpeg;base64,{base64_data}")
+    return encoded_files
 
 
 # --- Test Functions ---
@@ -114,6 +87,50 @@ def test_chat_streaming_basic(request, openai_client, model, provider, verificat
     assert case["output"].lower() in content.lower()
 
 
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases["test_chat_input_validation"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_chat_non_streaming_error_handling(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    with pytest.raises(APIError) as e:
+        openai_client.chat.completions.create(
+            model=model,
+            messages=case["input"]["messages"],
+            stream=False,
+            tool_choice=case["input"]["tool_choice"] if "tool_choice" in case["input"] else None,
+            tools=case["input"]["tools"] if "tools" in case["input"] else None,
+        )
+    assert case["output"]["error"]["status_code"] == e.value.status_code
+
+
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases["test_chat_input_validation"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_chat_streaming_error_handling(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    with pytest.raises(APIError) as e:
+        response = openai_client.chat.completions.create(
+            model=model,
+            messages=case["input"]["messages"],
+            stream=True,
+            tool_choice=case["input"]["tool_choice"] if "tool_choice" in case["input"] else None,
+            tools=case["input"]["tools"] if "tools" in case["input"] else None,
+        )
+        for _chunk in response:
+            pass
+    assert str(case["output"]["error"]["status_code"]) in e.value.message
+
+
 @pytest.mark.parametrize(
     "case",
     chat_completion_test_cases["test_chat_image"]["test_params"]["case"],
@@ -243,43 +260,373 @@ def test_chat_streaming_tool_calling(request, openai_client, model, provider, ve
         stream=True,
     )
 
-    # Accumulate partial tool_calls here
-    tool_calls_buffer = {}
-    current_id = None
-    # Process streaming chunks
-    for chunk in stream:
-        choice = chunk.choices[0]
-        delta = choice.delta
-
-        if delta.tool_calls is None:
-            continue
-
-        for tool_call_delta in delta.tool_calls:
-            if tool_call_delta.id:
-                current_id = tool_call_delta.id
-            call_id = current_id
-            func_delta = tool_call_delta.function
-
-            if call_id not in tool_calls_buffer:
-                tool_calls_buffer[call_id] = {
-                    "id": call_id,
-                    "type": tool_call_delta.type,
-                    "name": func_delta.name,
-                    "arguments": "",
-                }
-
-            if func_delta.arguments:
-                tool_calls_buffer[call_id]["arguments"] += func_delta.arguments
-
+    _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)
     assert len(tool_calls_buffer) == 1
-    for call in tool_calls_buffer.values():
+    for call in tool_calls_buffer:
         assert len(call["id"]) > 0
-        assert call["name"] == "get_weather"
+        function = call["function"]
+        assert function["name"] == "get_weather"
 
-        args_dict = json.loads(call["arguments"])
+        args_dict = json.loads(function["arguments"])
         assert "san francisco" in args_dict["location"].lower()
 
 
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases["test_tool_calling"]["test_params"]["case"],  # Reusing existing case for now
+    ids=case_id_generator,
+)
+def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    response = openai_client.chat.completions.create(
+        model=model,
+        messages=case["input"]["messages"],
+        tools=case["input"]["tools"],
+        tool_choice="required",  # Force tool call
+        stream=False,
+    )
+
+    assert response.choices[0].message.role == "assistant"
+    assert len(response.choices[0].message.tool_calls) > 0, "Expected tool call when tool_choice='required'"
+    expected_tool_name = case["input"]["tools"][0]["function"]["name"]
+    assert response.choices[0].message.tool_calls[0].function.name == expected_tool_name
+
+
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases["test_tool_calling"]["test_params"]["case"],  # Reusing existing case for now
+    ids=case_id_generator,
+)
+def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    stream = openai_client.chat.completions.create(
+        model=model,
+        messages=case["input"]["messages"],
+        tools=case["input"]["tools"],
+        tool_choice="required",  # Force tool call
+        stream=True,
+    )
+
+    _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)
+
+    assert len(tool_calls_buffer) > 0, "Expected tool call when tool_choice='required'"
+    expected_tool_name = case["input"]["tools"][0]["function"]["name"]
+    assert any(call["function"]["name"] == expected_tool_name for call in tool_calls_buffer), (
+        f"Expected tool call '{expected_tool_name}' not found in stream"
+    )
+
+
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases["test_tool_calling"]["test_params"]["case"],  # Reusing existing case for now
+    ids=case_id_generator,
+)
+def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    response = openai_client.chat.completions.create(
+        model=model,
+        messages=case["input"]["messages"],
+        tools=case["input"]["tools"],
+        tool_choice="none",
+        stream=False,
+    )
+
+    assert response.choices[0].message.role == "assistant"
+    assert response.choices[0].message.tool_calls is None, "Expected no tool calls when tool_choice='none'"
+    assert response.choices[0].message.content is not None, "Expected content when tool_choice='none'"
+
+
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases["test_tool_calling"]["test_params"]["case"],  # Reusing existing case for now
+    ids=case_id_generator,
+)
+def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    stream = openai_client.chat.completions.create(
+        model=model,
+        messages=case["input"]["messages"],
+        tools=case["input"]["tools"],
+        tool_choice="none",
+        stream=True,
+    )
+
+    content = ""
+    for chunk in stream:
+        delta = chunk.choices[0].delta
+        if delta.content:
+            content += delta.content
+        assert not delta.tool_calls, "Expected no tool call chunks when tool_choice='none'"
+
+    assert len(content) > 0, "Expected content when tool_choice='none'"
+
+
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases.get("test_chat_multi_turn_tool_calling", {}).get("test_params", {}).get("case", []),
+    ids=case_id_generator,
+)
+def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):
+    """
+    Test cases for multi-turn tool calling.
+    Tool calls are asserted.
+    Tool responses are provided in the test case.
+    Final response is asserted.
+    """
+
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    # Create a copy of the messages list to avoid modifying the original
+    messages = []
+    tools = case["input"]["tools"]
+    # Use deepcopy to prevent modification across runs/parametrization
+    expected_results = copy.deepcopy(case["expected"])
+    tool_responses = copy.deepcopy(case.get("tool_responses", []))
+    input_messages_turns = copy.deepcopy(case["input"]["messages"])
+
+    # keep going until either
+    # 1. we have messages to test in multi-turn
+    # 2. no messages but last message is tool response
+    while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
+        # do not take new messages if last message is tool response
+        if len(messages) == 0 or messages[-1]["role"] != "tool":
+            new_messages = input_messages_turns.pop(0)
+            # Ensure new_messages is a list of message objects
+            if isinstance(new_messages, list):
+                messages.extend(new_messages)
+            else:
+                # If it's a single message object, add it directly
+                messages.append(new_messages)
+
+        # --- API Call ---
+        response = openai_client.chat.completions.create(
+            model=model,
+            messages=messages,
+            tools=tools,
+            stream=False,
+        )
+
+        # --- Process Response ---
+        assistant_message = response.choices[0].message
+        messages.append(assistant_message.model_dump(exclude_unset=True))
+
+        assert assistant_message.role == "assistant"
+
+        # Get the expected result data
+        expected = expected_results.pop(0)
+        num_tool_calls = expected["num_tool_calls"]
+
+        # --- Assertions based on expected result ---
+        assert len(assistant_message.tool_calls or []) == num_tool_calls, (
+            f"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}"
+        )
+
+        if num_tool_calls > 0:
+            tool_call = assistant_message.tool_calls[0]
+            assert tool_call.function.name == expected["tool_name"], (
+                f"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'"
+            )
+            # Parse the JSON string arguments before comparing
+            actual_arguments = json.loads(tool_call.function.arguments)
+            assert actual_arguments == expected["tool_arguments"], (
+                f"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'"
+            )
+
+            # Prepare and append the tool response for the next turn
+            tool_response = tool_responses.pop(0)
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": tool_call.id,
+                    "content": tool_response["response"],
+                }
+            )
+        else:
+            assert assistant_message.content is not None, "Expected content, but none received."
+            expected_answers = expected["answer"]  # This is now a list
+            content_lower = assistant_message.content.lower()
+            assert any(ans.lower() in content_lower for ans in expected_answers), (
+                f"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'"
+            )
+
+
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases.get("test_chat_multi_turn_tool_calling", {}).get("test_params", {}).get("case", []),
+    ids=case_id_generator,
+)
+def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):
+    """ """
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    messages = []
+    tools = case["input"]["tools"]
+    expected_results = copy.deepcopy(case["expected"])
+    tool_responses = copy.deepcopy(case.get("tool_responses", []))
+    input_messages_turns = copy.deepcopy(case["input"]["messages"])
+
+    while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
+        if len(messages) == 0 or messages[-1]["role"] != "tool":
+            new_messages = input_messages_turns.pop(0)
+            if isinstance(new_messages, list):
+                messages.extend(new_messages)
+            else:
+                messages.append(new_messages)
+
+        # --- API Call (Streaming) ---
+        stream = openai_client.chat.completions.create(
+            model=model,
+            messages=messages,
+            tools=tools,
+            stream=True,
+        )
+
+        # --- Process Stream ---
+        accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)
+
+        # --- Construct Assistant Message for History ---
+        assistant_message_dict = {"role": "assistant"}
+        if accumulated_content:
+            assistant_message_dict["content"] = accumulated_content
+        if accumulated_tool_calls:
+            assistant_message_dict["tool_calls"] = accumulated_tool_calls
+
+        messages.append(assistant_message_dict)
+
+        # --- Assertions ---
+        expected = expected_results.pop(0)
+        num_tool_calls = expected["num_tool_calls"]
+
+        assert len(accumulated_tool_calls or []) == num_tool_calls, (
+            f"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}"
+        )
+
+        if num_tool_calls > 0:
+            # Use the first accumulated tool call for assertion
+            tool_call = accumulated_tool_calls[0]
+            assert tool_call["function"]["name"] == expected["tool_name"], (
+                f"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'"
+            )
+            # Parse the accumulated arguments string for comparison
+            actual_arguments = json.loads(tool_call["function"]["arguments"])
+            assert actual_arguments == expected["tool_arguments"], (
+                f"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'"
+            )
+
+            # Prepare and append the tool response for the next turn
+            tool_response = tool_responses.pop(0)
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": tool_call["id"],
+                    "content": tool_response["response"],
+                }
+            )
+        else:
+            assert accumulated_content is not None and accumulated_content != "", "Expected content, but none received."
+            expected_answers = expected["answer"]
+            content_lower = accumulated_content.lower()
+            assert any(ans.lower() in content_lower for ans in expected_answers), (
+                f"Expected one of {expected_answers} in content, but got: '{accumulated_content}'"
+            )
+
+
+@pytest.mark.parametrize("stream", [False, True], ids=["stream=False", "stream=True"])
+def test_chat_multi_turn_multiple_images(
+    request, openai_client, model, provider, verification_config, multi_image_data, stream
+):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    messages_turn1 = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": multi_image_data[0],
+                    },
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": multi_image_data[1],
+                    },
+                },
+                {
+                    "type": "text",
+                    "text": "What furniture is in the first image that is not in the second image?",
+                },
+            ],
+        },
+    ]
+
+    # First API call
+    response1 = openai_client.chat.completions.create(
+        model=model,
+        messages=messages_turn1,
+        stream=stream,
+    )
+    if stream:
+        message_content1 = ""
+        for chunk in response1:
+            message_content1 += chunk.choices[0].delta.content or ""
+    else:
+        message_content1 = response1.choices[0].message.content
+    assert len(message_content1) > 0
+    assert any(expected in message_content1.lower().strip() for expected in {"chair", "table"}), message_content1
+
+    # Prepare messages for the second turn
+    messages_turn2 = messages_turn1 + [
+        {"role": "assistant", "content": message_content1},
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": multi_image_data[2],
+                    },
+                },
+                {"type": "text", "text": "What is in this image that is also in the first image?"},
+            ],
+        },
+    ]
+
+    # Second API call
+    response2 = openai_client.chat.completions.create(
+        model=model,
+        messages=messages_turn2,
+        stream=stream,
+    )
+    if stream:
+        message_content2 = ""
+        for chunk in response2:
+            message_content2 += chunk.choices[0].delta.content or ""
+    else:
+        message_content2 = response2.choices[0].message.content
+    assert len(message_content2) > 0
+    assert any(expected in message_content2.lower().strip() for expected in {"bed"}), message_content2
+
+
 # --- Helper functions (structured output validation) ---
 
 
@@ -324,3 +671,47 @@ def validate_structured_output(maybe_json_content: str, schema_name: str) -> Non
         assert len(structured_output.participants) == 2
     elif schema_name == "valid_math_reasoning":
         assert len(structured_output.final_answer) > 0
+
+
+def _accumulate_streaming_tool_calls(stream):
+    """Accumulates tool calls and content from a streaming ChatCompletion response."""
+    tool_calls_buffer = {}
+    current_id = None
+    full_content = ""  # Initialize content accumulator
+    # Process streaming chunks
+    for chunk in stream:
+        choice = chunk.choices[0]
+        delta = choice.delta
+
+        # Accumulate content
+        if delta.content:
+            full_content += delta.content
+
+        if delta.tool_calls is None:
+            continue
+
+        for tool_call_delta in delta.tool_calls:
+            if tool_call_delta.id:
+                current_id = tool_call_delta.id
+            call_id = current_id
+            # Skip if no ID seen yet for this tool call delta
+            if not call_id:
+                continue
+            func_delta = tool_call_delta.function
+
+            if call_id not in tool_calls_buffer:
+                tool_calls_buffer[call_id] = {
+                    "id": call_id,
+                    "type": "function",  # Assume function type
+                    "function": {"name": None, "arguments": ""},  # Nested structure
+                }
+
+            # Accumulate name and arguments into the nested function dict
+            if func_delta:
+                if func_delta.name:
+                    tool_calls_buffer[call_id]["function"]["name"] = func_delta.name
+                if func_delta.arguments:
+                    tool_calls_buffer[call_id]["function"]["arguments"] += func_delta.arguments
+
+    # Return content and tool calls as a list
+    return full_content, list(tool_calls_buffer.values())
diff --git a/tests/verifications/openai_api/test_responses.py b/tests/verifications/openai_api/test_responses.py
new file mode 100644
index 000000000..2ce0a3e9c
--- /dev/null
+++ b/tests/verifications/openai_api/test_responses.py
@@ -0,0 +1,395 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+
+import httpx
+import openai
+import pytest
+
+from llama_stack import LlamaStackAsLibraryClient
+from llama_stack.distribution.datatypes import AuthenticationRequiredError
+from tests.common.mcp import make_mcp_server
+from tests.verifications.openai_api.fixtures.fixtures import (
+    case_id_generator,
+    get_base_test_name,
+    should_skip_test,
+)
+from tests.verifications.openai_api.fixtures.load import load_test_cases
+
+responses_test_cases = load_test_cases("responses")
+
+
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_basic"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_non_streaming_basic(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    response = openai_client.responses.create(
+        model=model,
+        input=case["input"],
+        stream=False,
+    )
+    output_text = response.output_text.lower().strip()
+    assert len(output_text) > 0
+    assert case["output"].lower() in output_text
+
+    retrieved_response = openai_client.responses.retrieve(response_id=response.id)
+    assert retrieved_response.output_text == response.output_text
+
+    next_response = openai_client.responses.create(
+        model=model, input="Repeat your previous response in all caps.", previous_response_id=response.id
+    )
+    next_output_text = next_response.output_text.strip()
+    assert case["output"].upper() in next_output_text
+
+
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_basic"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_streaming_basic(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    import time
+
+    response = openai_client.responses.create(
+        model=model,
+        input=case["input"],
+        stream=True,
+    )
+
+    # Track events and timing to verify proper streaming
+    events = []
+    event_times = []
+    response_id = ""
+
+    start_time = time.time()
+
+    for chunk in response:
+        current_time = time.time()
+        event_times.append(current_time - start_time)
+        events.append(chunk)
+
+        if chunk.type == "response.created":
+            # Verify response.created is emitted first and immediately
+            assert len(events) == 1, "response.created should be the first event"
+            assert event_times[0] < 0.1, "response.created should be emitted immediately"
+            assert chunk.response.status == "in_progress"
+            response_id = chunk.response.id
+
+        elif chunk.type == "response.completed":
+            # Verify response.completed comes after response.created
+            assert len(events) >= 2, "response.completed should come after response.created"
+            assert chunk.response.status == "completed"
+            assert chunk.response.id == response_id, "Response ID should be consistent"
+
+            # Verify content quality
+            output_text = chunk.response.output_text.lower().strip()
+            assert len(output_text) > 0, "Response should have content"
+            assert case["output"].lower() in output_text, f"Expected '{case['output']}' in response"
+
+    # Verify we got both required events
+    event_types = [event.type for event in events]
+    assert "response.created" in event_types, "Missing response.created event"
+    assert "response.completed" in event_types, "Missing response.completed event"
+
+    # Verify event order
+    created_index = event_types.index("response.created")
+    completed_index = event_types.index("response.completed")
+    assert created_index < completed_index, "response.created should come before response.completed"
+
+    # Verify stored response matches streamed response
+    retrieved_response = openai_client.responses.retrieve(response_id=response_id)
+    final_event = events[-1]
+    assert retrieved_response.output_text == final_event.response.output_text
+
+
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_basic"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_streaming_incremental_content(request, openai_client, model, provider, verification_config, case):
+    """Test that streaming actually delivers content incrementally, not just at the end."""
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    import time
+
+    response = openai_client.responses.create(
+        model=model,
+        input=case["input"],
+        stream=True,
+    )
+
+    # Track all events and their content to verify incremental streaming
+    events = []
+    content_snapshots = []
+    event_times = []
+
+    start_time = time.time()
+
+    for chunk in response:
+        current_time = time.time()
+        event_times.append(current_time - start_time)
+        events.append(chunk)
+
+        # Track content at each event based on event type
+        if chunk.type == "response.output_text.delta":
+            # For delta events, track the delta content
+            content_snapshots.append(chunk.delta)
+        elif hasattr(chunk, "response") and hasattr(chunk.response, "output_text"):
+            # For response.created/completed events, track the full output_text
+            content_snapshots.append(chunk.response.output_text)
+        else:
+            content_snapshots.append("")
+
+    # Verify we have the expected events
+    event_types = [event.type for event in events]
+    assert "response.created" in event_types, "Missing response.created event"
+    assert "response.completed" in event_types, "Missing response.completed event"
+
+    # Check if we have incremental content updates
+    created_index = event_types.index("response.created")
+    completed_index = event_types.index("response.completed")
+
+    # The key test: verify content progression
+    created_content = content_snapshots[created_index]
+    completed_content = content_snapshots[completed_index]
+
+    # Verify that response.created has empty or minimal content
+    assert len(created_content) == 0, f"response.created should have empty content, got: {repr(created_content[:100])}"
+
+    # Verify that response.completed has the full content
+    assert len(completed_content) > 0, "response.completed should have content"
+    assert case["output"].lower() in completed_content.lower(), f"Expected '{case['output']}' in final content"
+
+    # Check for true incremental streaming by looking for delta events
+    delta_events = [i for i, event_type in enumerate(event_types) if event_type == "response.output_text.delta"]
+
+    # Assert that we have delta events (true incremental streaming)
+    assert len(delta_events) > 0, "Expected delta events for true incremental streaming, but found none"
+
+    # Verify delta events have content and accumulate to final content
+    delta_content_total = ""
+    non_empty_deltas = 0
+
+    for delta_idx in delta_events:
+        delta_content = content_snapshots[delta_idx]
+        if delta_content:
+            delta_content_total += delta_content
+            non_empty_deltas += 1
+
+    # Assert that we have meaningful delta content
+    assert non_empty_deltas > 0, "Delta events found but none contain content"
+    assert len(delta_content_total) > 0, "Delta events found but total delta content is empty"
+
+    # Verify that the accumulated delta content matches the final content
+    assert delta_content_total.strip() == completed_content.strip(), (
+        f"Delta content '{delta_content_total}' should match final content '{completed_content}'"
+    )
+
+    # Verify timing: delta events should come between created and completed
+    for delta_idx in delta_events:
+        assert created_index < delta_idx < completed_index, (
+            f"Delta event at index {delta_idx} should be between created ({created_index}) and completed ({completed_index})"
+        )
+
+
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_multi_turn"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_non_streaming_multi_turn(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    previous_response_id = None
+    for turn in case["turns"]:
+        response = openai_client.responses.create(
+            model=model,
+            input=turn["input"],
+            previous_response_id=previous_response_id,
+            tools=turn["tools"] if "tools" in turn else None,
+        )
+        previous_response_id = response.id
+        output_text = response.output_text.lower()
+        assert turn["output"].lower() in output_text
+
+
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_web_search"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_non_streaming_web_search(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    response = openai_client.responses.create(
+        model=model,
+        input=case["input"],
+        tools=case["tools"],
+        stream=False,
+    )
+    assert len(response.output) > 1
+    assert response.output[0].type == "web_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[1].type == "message"
+    assert response.output[1].status == "completed"
+    assert response.output[1].role == "assistant"
+    assert len(response.output[1].content) > 0
+    assert case["output"].lower() in response.output_text.lower().strip()
+
+
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_mcp_tool"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_non_streaming_mcp_tool(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    with make_mcp_server() as mcp_server_info:
+        tools = case["tools"]
+        for tool in tools:
+            if tool["type"] == "mcp":
+                tool["server_url"] = mcp_server_info["server_url"]
+
+        response = openai_client.responses.create(
+            model=model,
+            input=case["input"],
+            tools=tools,
+            stream=False,
+        )
+        assert len(response.output) >= 3
+        list_tools = response.output[0]
+        assert list_tools.type == "mcp_list_tools"
+        assert list_tools.server_label == "localmcp"
+        assert len(list_tools.tools) == 2
+        assert {t["name"] for t in list_tools.tools} == {"get_boiling_point", "greet_everyone"}
+
+        call = response.output[1]
+        assert call.type == "mcp_call"
+        assert call.name == "get_boiling_point"
+        assert json.loads(call.arguments) == {"liquid_name": "polyjuice", "celcius": True}
+        assert call.error is None
+        assert "-100" in call.output
+
+        message = response.output[2]
+        text_content = message.content[0].text
+        assert "boiling point" in text_content.lower()
+
+    with make_mcp_server(required_auth_token="test-token") as mcp_server_info:
+        tools = case["tools"]
+        for tool in tools:
+            if tool["type"] == "mcp":
+                tool["server_url"] = mcp_server_info["server_url"]
+
+        exc_type = (
+            AuthenticationRequiredError
+            if isinstance(openai_client, LlamaStackAsLibraryClient)
+            else (httpx.HTTPStatusError, openai.AuthenticationError)
+        )
+        with pytest.raises(exc_type):
+            openai_client.responses.create(
+                model=model,
+                input=case["input"],
+                tools=tools,
+                stream=False,
+            )
+
+        for tool in tools:
+            if tool["type"] == "mcp":
+                tool["server_url"] = mcp_server_info["server_url"]
+                tool["headers"] = {"Authorization": "Bearer test-token"}
+
+        response = openai_client.responses.create(
+            model=model,
+            input=case["input"],
+            tools=tools,
+            stream=False,
+        )
+        assert len(response.output) >= 3
+
+
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_custom_tool"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_non_streaming_custom_tool(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    response = openai_client.responses.create(
+        model=model,
+        input=case["input"],
+        tools=case["tools"],
+        stream=False,
+    )
+    assert len(response.output) == 1
+    assert response.output[0].type == "function_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].name == "get_weather"
+
+
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_image"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_non_streaming_image(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    response = openai_client.responses.create(
+        model=model,
+        input=case["input"],
+        stream=False,
+    )
+    output_text = response.output_text.lower()
+    assert case["output"].lower() in output_text
+
+
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_non_streaming_multi_turn_image(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    previous_response_id = None
+    for turn in case["turns"]:
+        response = openai_client.responses.create(
+            model=model,
+            input=turn["input"],
+            previous_response_id=previous_response_id,
+            tools=turn["tools"] if "tools" in turn else None,
+        )
+        previous_response_id = response.id
+        output_text = response.output_text.lower()
+        assert turn["output"].lower() in output_text
diff --git a/tests/verifications/test_results/fireworks.json b/tests/verifications/test_results/fireworks.json
index 061e44c08..ef5cf142e 100644
--- a/tests/verifications/test_results/fireworks.json
+++ b/tests/verifications/test_results/fireworks.json
@@ -1,15 +1,15 @@
 {
-  "created": 1744328795.171092,
-  "duration": 107.57908606529236,
+  "created": 1744918448.686489,
+  "duration": 254.68238854408264,
   "exitcode": 1,
-  "root": "/Users/erichuang/projects/llama-stack",
+  "root": "/home/erichuang/llama-stack",
   "environment": {},
   "summary": {
-    "passed": 28,
-    "skipped": 2,
-    "failed": 6,
-    "total": 36,
-    "collected": 36
+    "passed": 40,
+    "skipped": 4,
+    "failed": 40,
+    "total": 84,
+    "collected": 84
   },
   "collectors": [
     {
@@ -29,182 +29,422 @@
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 116
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 116
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 116
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 135
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 135
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 135
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 204
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 204
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 204
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 228
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 228
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 228
+          "lineno": 250
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]",
+          "type": "Function",
+          "lineno": 554
         }
       ]
     }
@@ -212,7 +452,7 @@
   "tests": [
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
@@ -231,21 +471,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.2175025000469759,
+        "duration": 0.13845239393413067,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7433859170414507,
+        "duration": 1.3300942620262504,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0001592918997630477,
+        "duration": 0.00025453977286815643,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
@@ -264,21 +504,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.007383499993011355,
+        "duration": 0.0806605163961649,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5949292909353971,
+        "duration": 0.6202042903751135,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015891704242676497,
+        "duration": 0.00026358477771282196,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
@@ -297,21 +537,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.010730999987572432,
+        "duration": 0.07190297450870275,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8945954169612378,
+        "duration": 0.7458920907229185,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003751249751076102,
+        "duration": 0.00024067144840955734,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
@@ -330,21 +570,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.01665666699409485,
+        "duration": 0.07551384158432484,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.907927209045738,
+        "duration": 0.6140249809250236,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00024874997325241566,
+        "duration": 0.00024476367980241776,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
@@ -363,21 +603,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.01039199996739626,
+        "duration": 0.07434738799929619,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5971567500382662,
+        "duration": 1.6738943997770548,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003488330403342843,
+        "duration": 0.000227426178753376,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
@@ -396,21 +636,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.018627874902449548,
+        "duration": 0.07130288146436214,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.0586736251134425,
+        "duration": 1.337895905598998,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00046974990982562304,
+        "duration": 0.00028038304299116135,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
@@ -429,21 +669,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.01706262503284961,
+        "duration": 0.0727478675544262,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6679969580145553,
+        "duration": 0.7670011632144451,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004670419730246067,
+        "duration": 0.00023174844682216644,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
@@ -462,21 +702,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.025956374942325056,
+        "duration": 0.07163545861840248,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.052679874934256,
+        "duration": 0.7582714259624481,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00026958296075463295,
+        "duration": 0.00028524454683065414,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
@@ -495,21 +735,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.015856957994401455,
+        "duration": 0.08122281823307276,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3096678329166025,
+        "duration": 0.6061851140111685,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0007620420074090362,
+        "duration": 0.0002497304230928421,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
@@ -528,21 +768,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.013509334065020084,
+        "duration": 0.07185561209917068,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5914681670255959,
+        "duration": 0.7516075978055596,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002906669396907091,
+        "duration": 0.00026526860892772675,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
@@ -561,21 +801,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.013216375024057925,
+        "duration": 0.07012896798551083,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.8804527079919353,
+        "duration": 1.8946502823382616,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002026669681072235,
+        "duration": 0.0002452842891216278,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
@@ -594,21 +834,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.00827441702131182,
+        "duration": 0.06955648958683014,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7407040420221165,
+        "duration": 1.0446623722091317,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005084159784018993,
+        "duration": 0.00023738667368888855,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 116,
+      "lineno": 138,
       "outcome": "skipped",
       "keywords": [
         "test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -627,22 +867,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.012424499960616231,
+        "duration": 0.07077906839549541,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.00032762496266514063,
+        "duration": 0.00021365191787481308,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
       },
       "teardown": {
-        "duration": 0.00032416603062301874,
+        "duration": 0.00018982868641614914,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 116,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -661,21 +901,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.02253958396613598,
+        "duration": 0.07118859142065048,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.64042466704268,
+        "duration": 4.20654855389148,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003636250039562583,
+        "duration": 0.00023640412837266922,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 116,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -694,21 +934,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.014634749968536198,
+        "duration": 0.07351029943674803,
         "outcome": "passed"
       },
       "call": {
-        "duration": 5.126485540997237,
+        "duration": 4.875292049720883,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002988330088555813,
+        "duration": 0.0002571679651737213,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 135,
+      "lineno": 157,
       "outcome": "skipped",
       "keywords": [
         "test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -727,22 +967,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.015854416065849364,
+        "duration": 0.07474396284669638,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.00038058299105614424,
+        "duration": 0.0002510417252779007,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
       },
       "teardown": {
-        "duration": 0.0002689170651137829,
+        "duration": 0.00020200759172439575,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 135,
+      "lineno": 157,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -761,21 +1001,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.011205915943719447,
+        "duration": 0.07380561903119087,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.2596546669956297,
+        "duration": 2.0082657346501946,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006222500232979655,
+        "duration": 0.0002522030845284462,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 135,
+      "lineno": 157,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -794,21 +1034,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.016557667055167258,
+        "duration": 0.07040839456021786,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.930164708988741,
+        "duration": 4.871666649356484,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00048687495291233063,
+        "duration": 0.0002490682527422905,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
@@ -827,21 +1067,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.00886166701093316,
+        "duration": 0.07167178671807051,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8833738330285996,
+        "duration": 0.9903911761939526,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00025583396200090647,
+        "duration": 0.0002704570069909096,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
@@ -860,21 +1100,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.01297520799562335,
+        "duration": 0.07073096185922623,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.9960687910206616,
+        "duration": 3.9858130905777216,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005048330640420318,
+        "duration": 0.00024665892124176025,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
@@ -893,21 +1133,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.007275875075720251,
+        "duration": 0.07138721086084843,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9094266659813002,
+        "duration": 1.1312237158417702,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00028041598852723837,
+        "duration": 0.00027671270072460175,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
@@ -926,21 +1166,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.008899332955479622,
+        "duration": 0.08204951789230108,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.117967874975875,
+        "duration": 2.7500197598710656,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00017600005958229303,
+        "duration": 0.00024303700774908066,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
@@ -959,21 +1199,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.0073364999843761325,
+        "duration": 0.07405088562518358,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.2714374579954892,
+        "duration": 1.238045932725072,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0001814159331843257,
+        "duration": 0.00024984683841466904,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
@@ -992,21 +1232,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.010546459001488984,
+        "duration": 0.07009329181164503,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.9954450000077486,
+        "duration": 3.55908961314708,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002719159238040447,
+        "duration": 0.00026627909392118454,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
@@ -1025,21 +1265,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.012508000014349818,
+        "duration": 0.07596437353640795,
         "outcome": "passed"
       },
       "call": {
-        "duration": 9.095425167004578,
+        "duration": 1.0093460381031036,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00029200001154094934,
+        "duration": 0.0002171723172068596,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
@@ -1058,21 +1298,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.014769250061362982,
+        "duration": 0.06995268166065216,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.9875252910424024,
+        "duration": 2.617857910692692,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006288329605013132,
+        "duration": 0.00024063047021627426,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
@@ -1091,21 +1331,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.014440709026530385,
+        "duration": 0.0729895168915391,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.2613736250204965,
+        "duration": 0.9500969992950559,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0001937919296324253,
+        "duration": 0.000257221981883049,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
@@ -1124,21 +1364,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.0071510839043185115,
+        "duration": 0.07070339564234018,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.2953888749470934,
+        "duration": 2.6405998673290014,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016245793085545301,
+        "duration": 0.0002397783100605011,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
@@ -1157,21 +1397,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.007294666953384876,
+        "duration": 0.07140882592648268,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.194703874993138,
+        "duration": 0.7515814090147614,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00017604196909815073,
+        "duration": 0.0002773841843008995,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
@@ -1190,21 +1430,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.019950625021010637,
+        "duration": 0.07105506956577301,
         "outcome": "passed"
       },
       "call": {
-        "duration": 8.4994609169662,
+        "duration": 3.091084435582161,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00026404205709695816,
+        "duration": 0.0002588946372270584,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 204,
+      "lineno": 226,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1223,34 +1463,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.011928000021725893,
+        "duration": 0.07215945608913898,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5664792089955881,
+        "duration": 1.13668860681355,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 223,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 245,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 223,
+            "lineno": 245,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x123a6dcf0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbd0430>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError"
       },
       "teardown": {
-        "duration": 0.00023799994960427284,
+        "duration": 0.0003727646544575691,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 204,
+      "lineno": 226,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1269,34 +1509,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.006813624990172684,
+        "duration": 0.07085339725017548,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.170418416033499,
+        "duration": 6.564900263212621,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 223,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 245,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 223,
+            "lineno": 245,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x123af3ac0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3cdf0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError"
       },
       "teardown": {
-        "duration": 0.0004129580920562148,
+        "duration": 0.00036074407398700714,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 204,
+      "lineno": 226,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1315,34 +1555,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01656208303757012,
+        "duration": 0.07105840742588043,
         "outcome": "passed"
       },
       "call": {
-        "duration": 22.76337137504015,
+        "duration": 1.9664474660530686,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 223,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 245,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 223,
+            "lineno": 245,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x1231f0460>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb6ee60>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError"
       },
       "teardown": {
-        "duration": 0.00038704206235706806,
+        "duration": 0.0003125220537185669,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 228,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1361,34 +1601,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.015727541991509497,
+        "duration": 0.07491886802017689,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5719050420448184,
+        "duration": 1.6239055208861828,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 274,
-          "message": "assert 0 == 1\n +  where 0 = len({})"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 269,
+          "message": "assert 0 == 1\n +  where 0 = len([])"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 274,
+            "lineno": 269,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x122ef2bc0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n            choice = chunk.choices[0]\n            delta = choice.delta\n    \n            if delta.tool_calls is None:\n                continue\n    \n            for tool_call_delta in delta.tool_calls:\n                if tool_call_delta.id:\n                    current_id = tool_call_delta.id\n                call_id = current_id\n                func_delta = tool_call_delta.function\n    \n                if call_id not in tool_calls_buffer:\n                    tool_calls_buffer[call_id] = {\n                        \"id\": call_id,\n                        \"type\": tool_call_delta.type,\n                        \"name\": func_delta.name,\n                        \"arguments\": \"\",\n                    }\n    \n                if func_delta.arguments:\n                    tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n    \n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda56740>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError"
       },
       "teardown": {
-        "duration": 0.0003532909322530031,
+        "duration": 0.0003996873274445534,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 228,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1407,34 +1647,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.011914041941054165,
+        "duration": 0.07084537390619516,
         "outcome": "passed"
       },
       "call": {
-        "duration": 5.403063916950487,
+        "duration": 7.175910825841129,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 274,
-          "message": "assert 0 == 1\n +  where 0 = len({})"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 269,
+          "message": "assert 0 == 1\n +  where 0 = len([])"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 274,
+            "lineno": 269,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x1231f19c0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n            choice = chunk.choices[0]\n            delta = choice.delta\n    \n            if delta.tool_calls is None:\n                continue\n    \n            for tool_call_delta in delta.tool_calls:\n                if tool_call_delta.id:\n                    current_id = tool_call_delta.id\n                call_id = current_id\n                func_delta = tool_call_delta.function\n    \n                if call_id not in tool_calls_buffer:\n                    tool_calls_buffer[call_id] = {\n                        \"id\": call_id,\n                        \"type\": tool_call_delta.type,\n                        \"name\": func_delta.name,\n                        \"arguments\": \"\",\n                    }\n    \n                if func_delta.arguments:\n                    tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n    \n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb51360>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError"
       },
       "teardown": {
-        "duration": 0.0005193749675527215,
+        "duration": 0.0003013862296938896,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 228,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1453,31 +1693,2059 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.012608832912519574,
+        "duration": 0.07152015157043934,
         "outcome": "passed"
       },
       "call": {
-        "duration": 7.587262416025624,
+        "duration": 9.749054622836411,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 274,
-          "message": "assert 0 == 1\n +  where 0 = len({})"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 269,
+          "message": "assert 0 == 1\n +  where 0 = len([])"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 274,
+            "lineno": 269,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x123a56d70>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n            choice = chunk.choices[0]\n            delta = choice.delta\n    \n            if delta.tool_calls is None:\n                continue\n    \n            for tool_call_delta in delta.tool_calls:\n                if tool_call_delta.id:\n                    current_id = tool_call_delta.id\n                call_id = current_id\n                func_delta = tool_call_delta.function\n    \n                if call_id not in tool_calls_buffer:\n                    tool_calls_buffer[call_id] = {\n                        \"id\": call_id,\n                        \"type\": tool_call_delta.type,\n                        \"name\": func_delta.name,\n                        \"arguments\": \"\",\n                    }\n    \n                if func_delta.arguments:\n                    tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n    \n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda32bc0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError"
       },
       "teardown": {
-        "duration": 0.0008685829816386104,
+        "duration": 0.0002990690991282463,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+      "lineno": 278,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07075500208884478,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9870151281356812,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00022785458713769913,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+      "lineno": 278,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.0698307491838932,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.061793921515346,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 298,
+          "message": "TypeError: object of type 'NoneType' has no len()"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 298,
+            "message": "TypeError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb678e0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:298: TypeError"
+      },
+      "teardown": {
+        "duration": 0.00028742197901010513,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+      "lineno": 278,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07069965451955795,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 24.973835667595267,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 298,
+          "message": "TypeError: object of type 'NoneType' has no len()"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 298,
+            "message": "TypeError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdab3430>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:298: TypeError"
+      },
+      "teardown": {
+        "duration": 0.00034868158400058746,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+      "lineno": 302,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07031871005892754,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7874777475371957,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00027067307382822037,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+      "lineno": 302,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07194838207215071,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.034253670834005,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 323,
+          "message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n +  where 0 = len([])"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 323,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda29390>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n    \n>       assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE       AssertionError: Expected tool call when tool_choice='required'\nE       assert 0 > 0\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:323: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00030618347227573395,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+      "lineno": 302,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07107715681195259,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 6.841737313196063,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 323,
+          "message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n +  where 0 = len([])"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 323,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdab73d0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n    \n>       assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE       AssertionError: Expected tool call when tool_choice='required'\nE       assert 0 > 0\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:323: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003354279324412346,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+      "lineno": 329,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.0726231737062335,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7659661257639527,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003337552770972252,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+      "lineno": 329,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.09297824744135141,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.257608976215124,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00022768322378396988,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+      "lineno": 329,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.0726541867479682,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.5413802824914455,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026340410113334656,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+      "lineno": 352,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07666508108377457,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5535151390358806,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003251638263463974,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+      "lineno": 352,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.09550460614264011,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.171110725030303,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002604629844427109,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+      "lineno": 352,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07114547491073608,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 27.369331603869796,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023956969380378723,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07612851448357105,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.10164753254503,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acda87ca0>)"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 467,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda57190>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acda87ca0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00030514132231473923,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07009781803935766,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.49614445772022,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb50490>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00035297591239213943,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.0719120567664504,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.181352874264121,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdc0c550>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.000303901731967926,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07158921286463737,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.7202864307910204,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdae22f0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003700554370880127,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07388217654079199,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6030126195400953,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdca8670>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003188345581293106,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07314795535057783,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.0849075820297003,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required). e.g. San Francisco, CA.\", \"type\": \"string\"}}}}'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdad8970>)"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 467,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda560e0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required). e.g. San Francisco, CA.\", \"type\": \"string\"}}}}'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdad8970>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00032442156225442886,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07257637288421392,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.1364115234464407,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda30c70>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003107702359557152,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.0716616166755557,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.6755285635590553,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": {\"type\": \"string\", \"value\": \"Widget\"}, \"description\": {\"type\": \"string\", \"value\": \"Name of the product\"}, \"price\": {\"type\": \"number\", \"value\": 19.99}, \"inStock\": {\"type\": \"boolean\", \"value\": true}, \"tags\": {\"type\": \"array\", \"value\": [\"new\", \"sale\"]}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb6f850>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": {\"type\": \"string\", \"value\": \"Widget\"}, \"description\": {\"type\": \"string\", \"value\": \"Name of the product\"}, \"price\": {\"type\": \"number\", \"value\": 19.99}, \"inStock\": {\"type\": \"boolean\", \"value\": true}, \"tags\": {\"type\": \"array\", \"value\": [\"new\", \"sale\"]}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003323536366224289,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07031949236989021,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.363899651914835,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"March 3rd\"}, \"time\": {\"time\": \"10 am\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3dff0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"March 3rd\"}, \"time\": {\"time\": \"10 am\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003245687112212181,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07069017831236124,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.8757586162537336,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3d5a0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00030215736478567123,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07024750486016273,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.9532439298927784,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'Since there's no function defined to directly answer \"What's the name of the Sun in latin?\", I'll assume there's a general knowledge or information retrieval function available. Let's call it \"get_general_knowledge\". \n  \n  Here is a potential JSON response for a function call:\n  \n  {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \n  \n  However, the exact function and parameter names might vary based on the actual function definitions available. If we consider the given function \"get_weather\" and its parameters, it doesn't fit the prompt. Therefore, based on a hypothetical \"get_general_knowledge\" function, the response is provided. \n  \n  If the actual available functions were listed, a more accurate response could be provided. \n  \n  For the sake of the given prompt and assuming the presence of a \"get_general_knowledge\" function, the response is:\n  \n  {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}}'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d54d0>)"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 467,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3e230>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'Since there's no function defined to directly answer \"What's the name of the Sun in latin?\", I'll assume there's a general knowledge or information retrieval function available. Let's call it \"get_general_knowledge\". \nE                 \nE                 Here is a potential JSON response for a function call:\nE                 \nE                 {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \nE                 \nE                 However, the exact function and parameter names might vary based on the actual function definitions available. If we consider the given function \"get_weather\" and its parameters, it doesn't fit the prompt. Therefore, based on a hypothetical \"get_general_knowledge\" function, the response is provided. \nE                 \nE                 If the actual available functions were listed, a more accurate response could be provided. \nE                 \nE                 For the sake of the given prompt and assuming the presence of a \"get_general_knowledge\" function, the response is:\nE                 \nE                 {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}}'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d54d0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00038253143429756165,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07193771284073591,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9909431086853147,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb91570>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003658318892121315,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.0702557684853673,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.8836336443200707,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbec2e0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00036840979009866714,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07019469328224659,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 7.394101745449007,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}} \"\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\nimport json\\noutput = json.loads(\"{\\\\\"type\\\\\": \\\\\"function\\\\\", \\\\\"name\\\\\": \\\\\"get_event\\\\\", \\\\\"parameters\\\\\": {\\\\\"date\\\\\": \\\\\"2025-03-03\\\\\", \\\\\"time\\\\\": \\\\\"10:00\\\\\"}}\")\\nprint(output)assistant\\n\\nimport json\\n\\n# Define the input data\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\n\\n# Parse the JSON string into a Python object\\njson_data = json.loads(data)\\n\\n# Print the parsed data\\nprint(json_data)assistant\\n\\nimport json\\n\\ndef check_meeting(data):\\n    # Parse the JSON string into a Python object\\n    json_data = json.loads(data)\\n\\n    # Check if there is a meeting on March 3rd at 10 am\\n    for item in json_data:\\n        if item[\\'type\\'] == \\'function\\' and item[\\'name\\'] == \\'get_event\\':\\n            date = item[\\'parameters\\'][\\'date\\']\\n            time = item[\\'parameters\\'][\\'time\\']\\n            # Assuming you have a function to check if there is a meeting at the given date and time\\n            # For simplicity, let\\'s assume the function is called \\'has_meeting\\'\\n            # if has_meeting(date, time):\\n            #     return \\'Yes\\'\\n            # else:\\n            #     return \\'No\\'\\n            return \\'Yes\\'  # For demonstration purposes, let\\'s assume there is a meeting\\n\\n    return \\'No\\'\\n\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\nprint(check_meeting(data))assistant\\n\\nYes.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdd76110>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}} \"\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\nimport json\\noutput = json.loads(\"{\\\\\"type\\\\\": \\\\\"function\\\\\", \\\\\"name\\\\\": \\\\\"get_event\\\\\", \\\\\"parameters\\\\\": {\\\\\"date\\\\\": \\\\\"2025-03-03\\\\\", \\\\\"time\\\\\": \\\\\"10:00\\\\\"}}\")\\nprint(output)assistant\\n\\nimport json\\n\\n# Define the input data\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\n\\n# Parse the JSON string into a Python object\\njson_data = json.loads(data)\\n\\n# Print the parsed data\\nprint(json_data)assistant\\n\\nimport json\\n\\ndef check_meeting(data):\\n    # Parse the JSON string into a Python object\\n    json_data = json.loads(data)\\n\\n    # Check if there is a meeting on March 3rd at 10 am\\n    for item in json_data:\\n        if item[\\'type\\'] == \\'function\\' and item[\\'name\\'] == \\'get_event\\':\\n            date = item[\\'parameters\\'][\\'date\\']\\n            time = item[\\'parameters\\'][\\'time\\']\\n            # Assuming you have a function to check if there is a meeting at the given date and time\\n            # For simplicity, let\\'s assume the function is called \\'has_meeting\\'\\n            # if has_meeting(date, time):\\n            #     return \\'Yes\\'\\n            # else:\\n            #     return \\'No\\'\\n            return \\'Yes\\'  # For demonstration purposes, let\\'s assume there is a meeting\\n\\n    return \\'No\\'\\n\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\nprint(check_meeting(data))assistant\\n\\nYes.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003475993871688843,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07140176557004452,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.5649437978863716,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\" \"\" \" \"\"\"\"\"\"\"\"\"\"\"\"\" \"\" \"\"\" \"}\",\"\" \" \"}\",\"\" \" \"}\",\"\" \" \"{\" \"name\" \": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acd9b4640>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\" \"\" \" \"\"\"\"\"\"\"\"\"\"\"\"\" \"\" \"\"\" \"}\",\"\" \" \"}\",\"\" \" \"}\",\"\" \" \"{\" \"name\" \": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00034684035927057266,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07161083538085222,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.972024847753346,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 550,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d4510>)"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 550,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdab0c10>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]\n                content_lower = accumulated_content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE               assert False\nE                +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d4510>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003080591559410095,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07267874106764793,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.632216920144856,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbfbc70>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003350367769598961,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.0707720061764121,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9429405080154538,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdac0130>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0002858620136976242,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.06923680566251278,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7107308339327574,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdaaeb60>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003181472420692444,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07021687645465136,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7717038569971919,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbd04f0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00030398648232221603,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07320436742156744,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.2869794629514217,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 550,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required) (e.g. San Francisco, CA.\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}'\nassert False\n +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9b8e40>)"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 550,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda57a60>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]\n                content_lower = accumulated_content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required) (e.g. San Francisco, CA.\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}'\nE               assert False\nE                +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9b8e40>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003076540306210518,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.0732570867985487,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9204158475622535,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdaaf1c0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.000310627743601799,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07232664246112108,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.829266043379903,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbbc220>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00034091807901859283,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07045515719801188,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 6.550140863284469,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdc0d3f0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003092316910624504,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07400601450353861,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.142588397487998,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb52ce0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003124792128801346,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07049713470041752,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.074657499790192,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 550,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'Since the provided text describes a JSON schema for a function call to get the weather, and the prompt asks for the name of the Sun in Latin, we need to identify a suitable function that can provide this information. However, the given schema is for a \"get_weather\" function, which doesn't directly relate to the question about the Sun's name in Latin.\n  \n  Assuming there's another function available that can provide information about celestial bodies or their names in different languages, we might look for something like \"get_celestial_body_info\" or a similar function.\n  \n  However, based on the given format and the information provided, it seems there's an implication that we should directly provide a response in the specified JSON format for a hypothetical or related function. Let's assume a function named \"get_celestial_body_name\" that takes parameters like \"body\" and \"language\".\n  \n  Given the constraint of the format and assuming a function that fits, we might construct a response like:\n  \n  ```json\n  {\n    \"name\": \"get_celestial_body_name\",\n    \"parameters\": {\n      \"body\": \"Sun\",\n      \"language\": \"Latin\"\n    }\n  }\n  ```\n  \n  This response implies the existence of a function \"get_celestial_body_name\" that can take the name of a celestial body and a language as input and return the name of the celestial body in that language. \n  \n  So, the response is:\n  {\"name\": \"get_celestial_body_name\", \"parameters\": {\"body\": \"Sun\", \"language\": \"Latin\"}}'\nassert False\n +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdaba030>)"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 550,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda32d70>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]\n                content_lower = accumulated_content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'Since the provided text describes a JSON schema for a function call to get the weather, and the prompt asks for the name of the Sun in Latin, we need to identify a suitable function that can provide this information. However, the given schema is for a \"get_weather\" function, which doesn't directly relate to the question about the Sun's name in Latin.\nE                 \nE                 Assuming there's another function available that can provide information about celestial bodies or their names in different languages, we might look for something like \"get_celestial_body_info\" or a similar function.\nE                 \nE                 However, based on the given format and the information provided, it seems there's an implication that we should directly provide a response in the specified JSON format for a hypothetical or related function. Let's assume a function named \"get_celestial_body_name\" that takes parameters like \"body\" and \"language\".\nE                 \nE                 Given the constraint of the format and assuming a function that fits, we might construct a response like:\nE                 \nE                 ```json\nE                 {\nE                   \"name\": \"get_celestial_body_name\",\nE                   \"parameters\": {\nE                     \"body\": \"Sun\",\nE                     \"language\": \"Latin\"\nE                   }\nE                 }\nE                 ```\nE                 \nE                 This response implies the existence of a function \"get_celestial_body_name\" that can take the name of a celestial body and a language as input and return the name of the celestial body in that language. \nE                 \nE                 So, the response is:\nE                 {\"name\": \"get_celestial_body_name\", \"parameters\": {\"body\": \"Sun\", \"language\": \"Latin\"}}'\nE               assert False\nE                +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdaba030>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00031174439936876297,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07156828418374062,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6585372854024172,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb6cca0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003233151510357857,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07135927956551313,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.0483367526903749,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda577c0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00028971116989851,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07051362749189138,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.592376064509153,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acd9f5f30>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00029074493795633316,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07347700279206038,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.5335856154561043,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbd1360>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003180811181664467,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]",
+      "lineno": 554,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07250582799315453,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.00022417306900024414,
+        "outcome": "skipped",
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+      },
+      "teardown": {
+        "duration": 0.0036543207243084908,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]",
+      "lineno": 554,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07320290431380272,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.0002203313633799553,
+        "outcome": "skipped",
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+      },
+      "teardown": {
+        "duration": 0.00035103876143693924,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07001570798456669,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 6.779760396108031,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023057777434587479,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07039657514542341,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.335017805919051,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023656059056520462,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07107001543045044,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.857806807383895,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00028312671929597855,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07257402781397104,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.412369452416897,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0018147435039281845,
         "outcome": "passed"
       }
     }
   ],
-  "run_timestamp": 1744328684
+  "run_timestamp": 1744918193
 }
diff --git a/tests/verifications/test_results/meta_reference.json b/tests/verifications/test_results/meta_reference.json
new file mode 100644
index 000000000..9f9a6de82
--- /dev/null
+++ b/tests/verifications/test_results/meta_reference.json
@@ -0,0 +1,1097 @@
+{
+  "created": 1744918847.712677,
+  "duration": 215.2132911682129,
+  "exitcode": 0,
+  "root": "/home/erichuang/llama-stack",
+  "environment": {},
+  "summary": {
+    "passed": 28,
+    "total": 28,
+    "collected": 28
+  },
+  "collectors": [
+    {
+      "nodeid": "",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py",
+          "type": "Module"
+        }
+      ]
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+          "type": "Function",
+          "lineno": 114
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+          "type": "Function",
+          "lineno": 114
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 157
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+          "type": "Function",
+          "lineno": 181
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+          "type": "Function",
+          "lineno": 181
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+          "type": "Function",
+          "lineno": 204
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+          "type": "Function",
+          "lineno": 204
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 226
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 250
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        }
+      ]
+    }
+  ],
+  "tests": [
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "earth"
+      },
+      "setup": {
+        "duration": 0.09800294879823923,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.066351721994579,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025077443569898605,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "saturn"
+      },
+      "setup": {
+        "duration": 0.07197055127471685,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.1918699434027076,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00027959980070590973,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+      "lineno": 114,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "earth"
+      },
+      "setup": {
+        "duration": 0.07294174749404192,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.027987685985863,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026049185544252396,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+      "lineno": 114,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "saturn"
+      },
+      "setup": {
+        "duration": 0.0741243390366435,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.2185465842485428,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002712178975343704,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 138,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07473955396562815,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 10.396870554424822,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025566015392541885,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 157,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07153997663408518,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 10.59731453191489,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002689240500330925,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+      "lineno": 181,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "calendar"
+      },
+      "setup": {
+        "duration": 0.07629724312573671,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.293915126472712,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002626115456223488,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+      "lineno": 181,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "math"
+      },
+      "setup": {
+        "duration": 0.07231003511697054,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 19.020215207710862,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025262776762247086,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+      "lineno": 204,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "calendar"
+      },
+      "setup": {
+        "duration": 0.07291634101420641,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 6.105666604824364,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00027642492204904556,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+      "lineno": 204,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "math"
+      },
+      "setup": {
+        "duration": 0.07050449773669243,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 19.080777555704117,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.000232757069170475,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 226,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07927203364670277,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7760327504947782,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00024862587451934814,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 250,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07514432724565268,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7971448050811887,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002687377855181694,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 278,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07167623657733202,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6906132427975535,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003270544111728668,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 302,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.0725558316335082,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9245227407664061,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002602478489279747,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 329,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07299680262804031,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 31.90802155341953,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023696757853031158,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 352,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07331038825213909,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 39.341348845511675,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00022847391664981842,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.10512833576649427,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.9590865215286613,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002405792474746704,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07294358871877193,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.7672317335382104,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003217160701751709,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.11179900728166103,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.411543940193951,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023025460541248322,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07234534807503223,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.438527720049024,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00028106197714805603,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.06979168020188808,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.186668715439737,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002599591389298439,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07083943020552397,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.31697681453079,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00029378384351730347,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07374998275190592,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.7863417640328407,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025129225105047226,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07009322382509708,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.248749589547515,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00022566411644220352,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.10290939453989267,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.644147016108036,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002319561317563057,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07125874608755112,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.2340452317148447,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002202410250902176,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07085523661226034,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 17.7453119084239,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00037308502942323685,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07670701760798693,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 12.663874679245055,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0008251797407865524,
+        "outcome": "passed"
+      }
+    }
+  ],
+  "run_timestamp": 1744918631
+}
diff --git a/tests/verifications/test_results/openai.json b/tests/verifications/test_results/openai.json
index 0c1892f7e..f40b8f532 100644
--- a/tests/verifications/test_results/openai.json
+++ b/tests/verifications/test_results/openai.json
@@ -1,13 +1,13 @@
 {
-  "created": 1744328898.0248861,
-  "duration": 47.561042070388794,
+  "created": 1744918586.2136743,
+  "duration": 136.56194758415222,
   "exitcode": 0,
-  "root": "/Users/erichuang/projects/llama-stack",
+  "root": "/home/erichuang/llama-stack",
   "environment": {},
   "summary": {
-    "passed": 24,
-    "total": 24,
-    "collected": 24
+    "passed": 56,
+    "total": 56,
+    "collected": 56
   },
   "collectors": [
     {
@@ -27,122 +27,282 @@
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 116
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 116
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 135
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 135
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 204
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 204
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 228
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 228
+          "lineno": 250
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=True]",
+          "type": "Function",
+          "lineno": 554
         }
       ]
     }
@@ -150,7 +310,7 @@
   "tests": [
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-earth]",
@@ -169,21 +329,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.0694252080284059,
+        "duration": 0.09683514852076769,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5709165419684723,
+        "duration": 1.2521671634167433,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0007626248989254236,
+        "duration": 0.0002309884876012802,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-saturn]",
@@ -202,21 +362,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.010281750001013279,
+        "duration": 0.08609516825526953,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6309260830748826,
+        "duration": 0.8818014115095139,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0001824579667299986,
+        "duration": 0.0002558426931500435,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-mini-earth]",
@@ -235,21 +395,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.007922374992631376,
+        "duration": 0.07237763796001673,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.31756504194345325,
+        "duration": 0.44337860122323036,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005268750246614218,
+        "duration": 0.00027293339371681213,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
@@ -268,21 +428,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.01643404201604426,
+        "duration": 0.07486020587384701,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7479908330133185,
+        "duration": 0.7754815155640244,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004037501057609916,
+        "duration": 0.00026193633675575256,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-earth]",
@@ -301,21 +461,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.021671707974746823,
+        "duration": 0.07270221784710884,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6701172919711098,
+        "duration": 0.5725504904985428,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005569590721279383,
+        "duration": 0.00025644712150096893,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-saturn]",
@@ -334,21 +494,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.015847125090658665,
+        "duration": 0.07263980247080326,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.636536999954842,
+        "duration": 0.6277077253907919,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00029395800083875656,
+        "duration": 0.0002706516534090042,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-mini-earth]",
@@ -367,21 +527,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.011792832985520363,
+        "duration": 0.07290142774581909,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5610962919890881,
+        "duration": 0.45955433789640665,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003578749019652605,
+        "duration": 0.0002704532817006111,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-mini-saturn]",
@@ -400,21 +560,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.016500207944773138,
+        "duration": 0.0736015671864152,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8060244580265135,
+        "duration": 1.1738686058670282,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005296670133247972,
+        "duration": 0.00026966072618961334,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
-      "lineno": 116,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[gpt-4o-case0]",
@@ -433,21 +593,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008338792016729712,
+        "duration": 0.07560365367680788,
         "outcome": "passed"
       },
       "call": {
-        "duration": 7.009252917021513,
+        "duration": 2.4073661137372255,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003042910248041153,
+        "duration": 0.0002443268895149231,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
-      "lineno": 116,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[gpt-4o-mini-case0]",
@@ -466,21 +626,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007238540914840996,
+        "duration": 0.06925276480615139,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.134693874977529,
+        "duration": 2.777276105247438,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003104590578004718,
+        "duration": 0.0002748873084783554,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
-      "lineno": 135,
+      "lineno": 157,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[gpt-4o-case0]",
@@ -499,21 +659,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.0161851670127362,
+        "duration": 0.07098669931292534,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.0745719589758664,
+        "duration": 3.0149426590651274,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00022620800882577896,
+        "duration": 0.0002702716737985611,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
-      "lineno": 135,
+      "lineno": 157,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[gpt-4o-mini-case0]",
@@ -532,21 +692,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.013220708002336323,
+        "duration": 0.07316321693360806,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.624867417034693,
+        "duration": 2.401849321089685,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00020633300300687551,
+        "duration": 0.0003180522471666336,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-calendar]",
@@ -565,21 +725,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.017596833989955485,
+        "duration": 0.07038832642138004,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.248568250099197,
+        "duration": 1.0188098661601543,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004248750628903508,
+        "duration": 0.00027244072407484055,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-math]",
@@ -598,21 +758,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.01512012502644211,
+        "duration": 0.07331131957471371,
         "outcome": "passed"
       },
       "call": {
-        "duration": 8.170285542029887,
+        "duration": 7.0907115917652845,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00043537491001188755,
+        "duration": 0.0003256639465689659,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
@@ -631,21 +791,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.010376665974035859,
+        "duration": 0.0749899847432971,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.756480542011559,
+        "duration": 0.6721736947074533,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00025695806834846735,
+        "duration": 0.0002617714926600456,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
@@ -664,21 +824,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.006846625008620322,
+        "duration": 0.07268172968178988,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.6833953330060467,
+        "duration": 2.6800331017002463,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00022558309137821198,
+        "duration": 0.0002518612891435623,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-calendar]",
@@ -697,21 +857,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.009646040969528258,
+        "duration": 0.07150284852832556,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6117532079806551,
+        "duration": 0.6667193034663796,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015258300118148327,
+        "duration": 0.00025727134197950363,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-math]",
@@ -730,21 +890,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.012024458032101393,
+        "duration": 0.07039738819003105,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.522625041077845,
+        "duration": 4.870940984226763,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004230838967487216,
+        "duration": 0.00025987718254327774,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
@@ -763,21 +923,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.009566582972183824,
+        "duration": 0.07166357431560755,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.5591942919418216,
+        "duration": 0.9911826532334089,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0007555419579148293,
+        "duration": 0.00028301775455474854,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-mini-math]",
@@ -796,21 +956,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.010828875005245209,
+        "duration": 0.07489973120391369,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.495122667052783,
+        "duration": 5.81621040776372,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002802090020850301,
+        "duration": 0.00027776509523391724,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
-      "lineno": 204,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[gpt-4o-case0]",
@@ -829,21 +989,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.012762792059220374,
+        "duration": 0.0709689250215888,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5655921660363674,
+        "duration": 0.6838962603360415,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00022304197773337364,
+        "duration": 0.00038875360041856766,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
-      "lineno": 204,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
@@ -862,21 +1022,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.03188708401285112,
+        "duration": 0.07440952491015196,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6159415419679135,
+        "duration": 0.6124099707230926,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005549580091610551,
+        "duration": 0.00031805597245693207,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
-      "lineno": 228,
+      "lineno": 250,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_calling[gpt-4o-case0]",
@@ -895,21 +1055,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.014768208027817309,
+        "duration": 0.07558728754520416,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.47373537498060614,
+        "duration": 1.0413735723122954,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005811670562252402,
+        "duration": 0.00026555173099040985,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
-      "lineno": 228,
+      "lineno": 250,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
@@ -928,18 +1088,1074 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.010271625011228025,
+        "duration": 0.07159029692411423,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5656027499353513,
+        "duration": 0.619917850010097,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0025699170073494315,
+        "duration": 0.00026798900216817856,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-case0]",
+      "lineno": 278,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[gpt-4o-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.10359053406864405,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6396236326545477,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.000257750041782856,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]",
+      "lineno": 278,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07243514712899923,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6169720906764269,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002462640404701233,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-case0]",
+      "lineno": 302,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[gpt-4o-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07266584690660238,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9391414495185018,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003280108794569969,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]",
+      "lineno": 302,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.08437065314501524,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6935106571763754,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00027523748576641083,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-case0]",
+      "lineno": 329,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[gpt-4o-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07208988349884748,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6744982637465,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002555781975388527,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]",
+      "lineno": 329,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07785151246935129,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6253539212048054,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00028202030807733536,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-case0]",
+      "lineno": 352,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[gpt-4o-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.0911521203815937,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7869452070444822,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00043197907507419586,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]",
+      "lineno": 352,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.10472878441214561,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6786438375711441,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025699567049741745,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07002853509038687,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.395758199505508,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002955012023448944,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07316868472844362,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.3224441464990377,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002612341195344925,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.10713072493672371,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.0061814906075597,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002610785886645317,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07267123833298683,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.26907461322844,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025866832584142685,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07208938524127007,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.8186135441064835,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026924535632133484,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07148494757711887,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.1276168935000896,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00024427566677331924,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07107946090400219,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.1634307894855738,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00030216481536626816,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07261826191097498,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.4525672728195786,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002602897584438324,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.0710728308185935,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.533652591519058,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002704774960875511,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.0781267425045371,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.160066588781774,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002731531858444214,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07118126843124628,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.068133544176817,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002514524385333061,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07241942081600428,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.1098179938271642,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00028003379702568054,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07439264003187418,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.0720843756571412,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026407837867736816,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07028928305953741,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.23135226033628,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002559954300522804,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.0733694015070796,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.3011497305706143,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002724975347518921,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07319487817585468,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.060736038722098,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002620834857225418,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07086801622062922,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.1969546489417553,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023349467664957047,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07276885025203228,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.2494191862642765,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002493094652891159,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07039583195000887,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.528189226053655,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025649741291999817,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07187813706696033,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.446169280447066,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00024812109768390656,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[gpt-4o-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07299137767404318,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 8.35237762145698,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026817526668310165,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[gpt-4o-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07363969460129738,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.653971025720239,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026602670550346375,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07377734407782555,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 9.776036521419883,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.000254971906542778,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07054048776626587,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 12.58133109845221,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0013354746624827385,
         "outcome": "passed"
       }
     }
   ],
-  "run_timestamp": 1744328848
+  "run_timestamp": 1744918448
 }
diff --git a/tests/verifications/test_results/together.json b/tests/verifications/test_results/together.json
index 2b23089e8..2d74b8cca 100644
--- a/tests/verifications/test_results/together.json
+++ b/tests/verifications/test_results/together.json
@@ -1,15 +1,15 @@
 {
-  "created": 1744328847.853437,
-  "duration": 49.9419469833374,
+  "created": 1744918192.9299376,
+  "duration": 126.91354608535767,
   "exitcode": 1,
-  "root": "/Users/erichuang/projects/llama-stack",
+  "root": "/home/erichuang/llama-stack",
   "environment": {},
   "summary": {
-    "passed": 22,
-    "failed": 12,
-    "skipped": 2,
-    "total": 36,
-    "collected": 36
+    "passed": 40,
+    "failed": 40,
+    "skipped": 4,
+    "total": 84,
+    "collected": 84
   },
   "collectors": [
     {
@@ -29,182 +29,422 @@
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
           "type": "Function",
-          "lineno": 73
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
           "type": "Function",
-          "lineno": 92
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 116
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 116
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 116
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 135
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 135
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 135
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
           "type": "Function",
-          "lineno": 159
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
           "type": "Function",
-          "lineno": 182
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 204
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 204
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 204
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 228
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 228
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 228
+          "lineno": 250
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
+          "type": "Function",
+          "lineno": 554
         }
       ]
     }
@@ -212,7 +452,7 @@
   "tests": [
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
@@ -231,21 +471,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.15774220903404057,
+        "duration": 0.11939296405762434,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5396400419995189,
+        "duration": 0.6422080835327506,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002977499971166253,
+        "duration": 0.0002934802323579788,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
@@ -264,21 +504,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.015632833004929125,
+        "duration": 0.07340026367455721,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4675290420418605,
+        "duration": 0.6134521719068289,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00029129208996891975,
+        "duration": 0.00031049735844135284,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
@@ -297,21 +537,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.01530187507160008,
+        "duration": 0.07351398840546608,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.501894542016089,
+        "duration": 0.898847377859056,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002060839906334877,
+        "duration": 0.0002735760062932968,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
@@ -330,21 +570,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.014841833035461605,
+        "duration": 0.08612977154552937,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4202229160582647,
+        "duration": 0.6511319326236844,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005559159908443689,
+        "duration": 0.0003559151664376259,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
@@ -363,21 +603,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.008204624988138676,
+        "duration": 0.08106738794595003,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.991508833016269,
+        "duration": 1.206272155046463,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.000539042055606842,
+        "duration": 0.0003584325313568115,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
-      "lineno": 73,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
@@ -396,21 +636,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.022528667002916336,
+        "duration": 0.0796442786231637,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.37111237505450845,
+        "duration": 0.4815350500866771,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005334159359335899,
+        "duration": 0.00025806669145822525,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
@@ -429,21 +669,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.00922920904122293,
+        "duration": 0.07231954019516706,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.1684916669037193,
+        "duration": 1.1521263290196657,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002740409690886736,
+        "duration": 0.00032721273601055145,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
@@ -462,21 +702,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.010883333045057952,
+        "duration": 0.07364387530833483,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4275277080014348,
+        "duration": 1.0600289879366755,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00043112505227327347,
+        "duration": 0.00028987880796194077,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
@@ -495,34 +735,34 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.012945958063937724,
+        "duration": 0.07162868417799473,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5551295839250088,
+        "duration": 0.2930005770176649,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 110,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 132,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 110,
+            "lineno": 132,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]>>\nopenai_client = <openai.OpenAI object at 0x107df13c0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]>>\nopenai_client = <openai.OpenAI object at 0x7f42743e7760>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
       },
       "teardown": {
-        "duration": 0.0002744169905781746,
+        "duration": 0.0004123607650399208,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
@@ -541,34 +781,34 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.017372542060911655,
+        "duration": 0.07553945016115904,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3579877089941874,
+        "duration": 0.4265708066523075,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 110,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 132,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 110,
+            "lineno": 132,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]>>\nopenai_client = <openai.OpenAI object at 0x107dd74f0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]>>\nopenai_client = <openai.OpenAI object at 0x7f42742571f0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
       },
       "teardown": {
-        "duration": 0.0005445419810712337,
+        "duration": 0.0003767991438508034,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
@@ -587,34 +827,34 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.014297832967713475,
+        "duration": 0.07143466174602509,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8067362919682637,
+        "duration": 1.0281891459599137,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 110,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 132,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 110,
+            "lineno": 132,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]>>\nopenai_client = <openai.OpenAI object at 0x110605a80>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]>>\nopenai_client = <openai.OpenAI object at 0x7f4274278310>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
       },
       "teardown": {
-        "duration": 0.0003220830112695694,
+        "duration": 0.0003773234784603119,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
-      "lineno": 92,
+      "lineno": 114,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
@@ -633,34 +873,34 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.008816750021651387,
+        "duration": 0.07092289440333843,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5383605000097305,
+        "duration": 0.4124102909117937,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 110,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 132,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 110,
+            "lineno": 132,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]>>\nopenai_client = <openai.OpenAI object at 0x107dbd480>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]>>\nopenai_client = <openai.OpenAI object at 0x7f42743e7310>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
       },
       "teardown": {
-        "duration": 0.00018316600471735,
+        "duration": 0.0003204820677638054,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 116,
+      "lineno": 138,
       "outcome": "skipped",
       "keywords": [
         "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -679,22 +919,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.0074389580404385924,
+        "duration": 0.07159135863184929,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.00014933396596461535,
+        "duration": 0.0002104705199599266,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
       },
       "teardown": {
-        "duration": 0.00012462493032217026,
+        "duration": 0.0003354400396347046,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 116,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -713,21 +953,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.013580625061877072,
+        "duration": 0.0744061404839158,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.89831429196056,
+        "duration": 2.2864254424348474,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.000491458922624588,
+        "duration": 0.000246487557888031,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 116,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -746,21 +986,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008266666904091835,
+        "duration": 0.07066962588578463,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.8873212080216035,
+        "duration": 4.47614302393049,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016850000247359276,
+        "duration": 0.00034836214035749435,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 135,
+      "lineno": 157,
       "outcome": "skipped",
       "keywords": [
         "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -779,22 +1019,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.0080461660400033,
+        "duration": 0.09739464800804853,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.00014758307952433825,
+        "duration": 0.0003191335126757622,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
       },
       "teardown": {
-        "duration": 0.00012695800978690386,
+        "duration": 0.00026350561529397964,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 135,
+      "lineno": 157,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -813,34 +1053,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.00845700001809746,
+        "duration": 0.10561292432248592,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.6604419159702957,
+        "duration": 2.6175378002226353,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 153,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 175,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 153,
+            "lineno": 175,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x110665270>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427415f430>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError"
       },
       "teardown": {
-        "duration": 0.00033458403777331114,
+        "duration": 0.0003682933747768402,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 135,
+      "lineno": 157,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -859,34 +1099,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.012580333976075053,
+        "duration": 0.07195662055164576,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.728511792025529,
+        "duration": 3.2985631534829736,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 153,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 175,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 153,
+            "lineno": 175,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x107df0cd0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741c7550>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError"
       },
       "teardown": {
-        "duration": 0.00023266696371138096,
+        "duration": 0.0003777453675866127,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
@@ -905,21 +1145,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.011554082971997559,
+        "duration": 0.0733196372166276,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.3857994999270886,
+        "duration": 0.40959454514086246,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003951250109821558,
+        "duration": 0.00029125437140464783,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
@@ -938,21 +1178,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.007673708954825997,
+        "duration": 0.07248916011303663,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.082161583006382,
+        "duration": 3.498455540277064,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002532500075176358,
+        "duration": 0.00023921672254800797,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
@@ -971,21 +1211,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.014791041961871088,
+        "duration": 0.07911352813243866,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6918012499809265,
+        "duration": 0.6717434097081423,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00027070799842476845,
+        "duration": 0.00025916099548339844,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
@@ -1004,21 +1244,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.014746625092811882,
+        "duration": 0.07156322989612818,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.5890139170223847,
+        "duration": 3.698870756663382,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00030137505382299423,
+        "duration": 0.0002654632553458214,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
@@ -1037,21 +1277,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.036798374960199,
+        "duration": 0.07457748707383871,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6914895409718156,
+        "duration": 0.8891718471422791,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00023716699797660112,
+        "duration": 0.0002395138144493103,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
-      "lineno": 159,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
@@ -1070,21 +1310,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.05965254199691117,
+        "duration": 0.07155069429427385,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.609581291093491,
+        "duration": 3.276700599119067,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002674580318853259,
+        "duration": 0.0002568913623690605,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
@@ -1103,21 +1343,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.014533916022628546,
+        "duration": 0.07365360390394926,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6227063750848174,
+        "duration": 0.7638470390811563,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00019699998665601015,
+        "duration": 0.00027653202414512634,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
@@ -1136,21 +1376,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.009818125050514936,
+        "duration": 0.07424602191895247,
         "outcome": "passed"
       },
       "call": {
-        "duration": 5.144610875053331,
+        "duration": 3.622116087935865,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00045220903120934963,
+        "duration": 0.0002861013635993004,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
@@ -1169,34 +1409,34 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.012392290984280407,
+        "duration": 0.07192372716963291,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.777625665999949,
+        "duration": 0.5049019353464246,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 201,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 223,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 201,
+            "lineno": 223,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]>>\nopenai_client = <openai.OpenAI object at 0x107dbd600>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]>>\nopenai_client = <openai.OpenAI object at 0x7f4274178c10>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
       },
       "teardown": {
-        "duration": 0.000559916952624917,
+        "duration": 0.00036794692277908325,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
@@ -1215,34 +1455,34 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.010390624986030161,
+        "duration": 0.07304532174021006,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.680094916955568,
+        "duration": 2.961389934644103,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 201,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 223,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 201,
+            "lineno": 223,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]>>\nopenai_client = <openai.OpenAI object at 0x107d8ed40>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]>>\nopenai_client = <openai.OpenAI object at 0x7f42741786d0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
       },
       "teardown": {
-        "duration": 0.00041987502481788397,
+        "duration": 0.0003312695771455765,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
@@ -1261,34 +1501,34 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.01190529193263501,
+        "duration": 0.07350922282785177,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6690819580107927,
+        "duration": 0.6764275450259447,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 201,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 223,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 201,
+            "lineno": 223,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]>>\nopenai_client = <openai.OpenAI object at 0x11066f130>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]>>\nopenai_client = <openai.OpenAI object at 0x7f427420ff40>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
       },
       "teardown": {
-        "duration": 0.000247166957706213,
+        "duration": 0.0003826189786195755,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
-      "lineno": 182,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
@@ -1307,34 +1547,34 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.009588208980858326,
+        "duration": 0.07295230869203806,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.4867218340514228,
+        "duration": 10.689278944395483,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 201,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 223,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 201,
+            "lineno": 223,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]>>\nopenai_client = <openai.OpenAI object at 0x11066ba30>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]>>\nopenai_client = <openai.OpenAI object at 0x7f427415eb60>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
       },
       "teardown": {
-        "duration": 0.00022487505339086056,
+        "duration": 0.0004014279693365097,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 204,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -1353,21 +1593,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008509417064487934,
+        "duration": 0.09202722646296024,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.45511841599363834,
+        "duration": 0.8140280386433005,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00031033402774482965,
+        "duration": 0.0003595082089304924,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 204,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1386,21 +1626,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01352791697718203,
+        "duration": 0.09484888892620802,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7166531670372933,
+        "duration": 0.3706049248576164,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00031470798421651125,
+        "duration": 0.0003290809690952301,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 204,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1419,21 +1659,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01369225000962615,
+        "duration": 0.10521113499999046,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.34134254103992134,
+        "duration": 0.36842701490968466,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002922919811680913,
+        "duration": 0.00031410157680511475,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 228,
+      "lineno": 250,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -1452,21 +1692,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.025748749962076545,
+        "duration": 0.10422383341938257,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7462511250050738,
+        "duration": 0.6454980997368693,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00030449999030679464,
+        "duration": 0.0002997415140271187,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 228,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1485,34 +1725,39 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.015131957945413888,
+        "duration": 0.09408890828490257,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4556894999695942,
+        "duration": 0.36066764686256647,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 251,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 251,
+            "lineno": 268,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x110674070>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741c44f0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4274268760>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.000539042055606842,
+        "duration": 0.00035039614886045456,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 228,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1531,31 +1776,2046 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.016429082956165075,
+        "duration": 0.07232134602963924,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3677835420239717,
+        "duration": 0.4706049496307969,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 251,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 251,
+            "lineno": 268,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x107ceb910>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427417ee60>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427416d960>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.001610000035725534,
+        "duration": 0.00039384420961141586,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+      "lineno": 278,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07465469185262918,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.4374591317027807,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003099888563156128,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 278,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07351493183523417,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.4368853671476245,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026369933038949966,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+      "lineno": 278,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07258845027536154,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.940508272498846,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00032961275428533554,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+      "lineno": 302,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07273276895284653,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6150273764505982,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002876110374927521,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 302,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07505382597446442,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5026597818359733,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 321,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42742aa050>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f42741e9810>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0003487151116132736,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+      "lineno": 302,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07343385275453329,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.720921658910811,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 321,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427416dab0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427447c340>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0004109758883714676,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+      "lineno": 329,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07189673464745283,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.403152690269053,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 349,
+          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=4867562177231181000).message"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 349,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741eb670>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=4867562177231181000).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00037758704274892807,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 329,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07282305508852005,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.4538485202938318,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 349,
+          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 349,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f4274247160>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003799665719270706,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+      "lineno": 329,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07050042506307364,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.3740060832351446,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 349,
+          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 349,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42742f3220>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003066370263695717,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+      "lineno": 352,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.06983672920614481,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6774894064292312,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 376,
+          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 376,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427430d480>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003580348566174507,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 352,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07331710867583752,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.38044120091944933,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 376,
+          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 376,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42745f3970>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003765234723687172,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+      "lineno": 352,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07194581907242537,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.37374384608119726,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 376,
+          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 376,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741c4520>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003813542425632477,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07330320309847593,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.4314677305519581,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\n +    where [ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 439,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274148ca0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\nE            +    where [ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00040314625948667526,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07405277714133263,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.8350177155807614,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023361947387456894,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07361320778727531,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.0619212854653597,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002395985648036003,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07290417980402708,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.241749887354672,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00027841050177812576,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07301546633243561,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.0520667918026447,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002469858154654503,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07405530381947756,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.48041669093072414,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to complete this task as it falls outside of the scope of the functions I have been given.'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f4274057610>)"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 467,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42740f7700>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to complete this task as it falls outside of the scope of the functions I have been given.'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f4274057610>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00035319291055202484,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.0724497502669692,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.832760401070118,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026283878833055496,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07180811651051044,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.4359142612665892,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002761436626315117,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07503274269402027,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.909641013480723,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002613905817270279,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07153380755335093,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.695867782458663,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00032124295830726624,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07275318540632725,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.34551760647445917,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f42742dd4d0>)"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 467,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427414b970>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f42742dd4d0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003842068836092949,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07281951513141394,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.008104412816465,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026233773678541183,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07155719958245754,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.3485742239281535,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002629430964589119,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07251190021634102,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.9882029946893454,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 450,
+          "message": "AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'date': '\"2025-03-03\"', 'location': '\"Main Conference Room\"', 'name': '\"Team Building\"', 'participants': ['Alice', 'Bob', 'Charlie'], 'time': '\"10:00\"'}'\nassert {'date': '\"20...harlie'], ...} == {'date': '202...harlie'], ...}\n  \n  Omitting 1 identical items, use -vv to show\n  Differing items:\n  {'date': '\"2025-03-03\"'} != {'date': '2025-03-03'}\n  {'name': '\"Team Building\"'} != {'name': 'Team Building'}\n  {'time': '\"10:00\"'} != {'time': '10:00'}\n  {'location': '\"Main Conference Room\"'} != {'location': 'Main Conference Room'}...\n  \n  ...Full output truncated (21 lines hidden), use '-vv' to show"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 450,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274027af0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n>               assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\nE               AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'date': '\"2025-03-03\"', 'location': '\"Main Conference Room\"', 'name': '\"Team Building\"', 'participants': ['Alice', 'Bob', 'Charlie'], 'time': '\"10:00\"'}'\nE               assert {'date': '\"20...harlie'], ...} == {'date': '202...harlie'], ...}\nE                 \nE                 Omitting 1 identical items, use -vv to show\nE                 Differing items:\nE                 {'date': '\"2025-03-03\"'} != {'date': '2025-03-03'}\nE                 {'name': '\"Team Building\"'} != {'name': 'Team Building'}\nE                 {'time': '\"10:00\"'} != {'time': '10:00'}\nE                 {'location': '\"Main Conference Room\"'} != {'location': 'Main Conference Room'}...\nE                 \nE                 ...Full output truncated (21 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:450: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003328891471028328,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07363704219460487,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.031332626007497,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002817586064338684,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07673048228025436,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.3994998000562191,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_dqcu28a6iyxlobv36c23k0qp', 'type': 'function'}]))"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 521,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274179c30>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_dqcu28a6iyxlobv36c23k0qp', 'type': 'function'}]))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003687366843223572,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07477510999888182,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.918418399989605,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 547,
+          "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 547,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f427417a2c0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00036141276359558105,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07217607088387012,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.2676455974578857,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00024215038865804672,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.0713065592572093,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.0453352769836783,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 547,
+          "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 547,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427415e0b0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00030668359249830246,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07108221855014563,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.034472893923521,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 547,
+          "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 547,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42743b7a90>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00035398639738559723,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07186305243521929,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.8766405330970883,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42743e54b0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f42742f0820>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0003088880330324173,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.0846314700320363,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.40889575984328985,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f42742f2bc0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f42740fd270>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0003652172163128853,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07273881137371063,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.251293654553592,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427420eda0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4273f940a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.00030664633959531784,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.071181770414114,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5708655547350645,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42740fc910>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4273f82b90>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.00036500580608844757,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.06934114638715982,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5055103581398726,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427410dea0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427430c580>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.00035354867577552795,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07129869516938925,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.5799349313601851,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427410c580>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427417b3a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.00033699069172143936,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07074506860226393,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5245106862857938,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f427430e590>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4274268a90>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.00042015407234430313,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07020766660571098,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6389470677822828,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42741784f0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4274254bb0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.00035757478326559067,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07121358439326286,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5222592242062092,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42741e8ca0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427416c6a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0003436664119362831,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07017400953918695,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.7245550760999322,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 506,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 688,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274256b90>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427415f0a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0003162780776619911,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
+      "lineno": 554,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07253758516162634,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.00021537486463785172,
+        "outcome": "skipped",
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+      },
+      "teardown": {
+        "duration": 0.0004162406548857689,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
+      "lineno": 554,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07268107868731022,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.0002132616937160492,
+        "outcome": "skipped",
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+      },
+      "teardown": {
+        "duration": 0.00021094270050525665,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07398672867566347,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.383559702895582,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002781357616186142,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+      "lineno": 554,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.08006586041301489,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.16784877050668,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 596,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 596,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]>>\nopenai_client = <openai.OpenAI object at 0x7f427416c490>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n    @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n    def test_chat_multi_turn_multiple_images(\n        request, openai_client, model, provider, verification_config, multi_image_data, stream\n    ):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages_turn1 = [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[0],\n                        },\n                    },\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[1],\n                        },\n                    },\n                    {\n                        \"type\": \"text\",\n                        \"text\": \"What furniture is in the first image that is not in the second image?\",\n                    },\n                ],\n            },\n        ]\n    \n        # First API call\n        response1 = openai_client.chat.completions.create(\n            model=model,\n            messages=messages_turn1,\n            stream=stream,\n        )\n        if stream:\n            message_content1 = \"\"\n            for chunk in response1:\n>               message_content1 += chunk.choices[0].delta.content or \"\"\nE               IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0003619194030761719,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.0709412069991231,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 6.110534753650427,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002450142055749893,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
+      "lineno": 554,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.0725309094414115,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.291131243109703,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 596,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 596,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]>>\nopenai_client = <openai.OpenAI object at 0x7f42740eb0d0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n    @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n    def test_chat_multi_turn_multiple_images(\n        request, openai_client, model, provider, verification_config, multi_image_data, stream\n    ):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages_turn1 = [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[0],\n                        },\n                    },\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[1],\n                        },\n                    },\n                    {\n                        \"type\": \"text\",\n                        \"text\": \"What furniture is in the first image that is not in the second image?\",\n                    },\n                ],\n            },\n        ]\n    \n        # First API call\n        response1 = openai_client.chat.completions.create(\n            model=model,\n            messages=messages_turn1,\n            stream=stream,\n        )\n        if stream:\n            message_content1 = \"\"\n            for chunk in response1:\n>               message_content1 += chunk.choices[0].delta.content or \"\"\nE               IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0018906639888882637,
         "outcome": "passed"
       }
     }
   ],
-  "run_timestamp": 1744328795
+  "run_timestamp": 1744918065
 }
diff --git a/uv.lock b/uv.lock
index c6c9b1004..c5832ad87 100644
--- a/uv.lock
+++ b/uv.lock
@@ -628,6 +628,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 },
 ]
 
+[[package]]
+name = "deepmerge"
+version = "2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a8/3a/b0ba594708f1ad0bc735884b3ad854d3ca3bdc1d741e56e40bbda6263499/deepmerge-2.0.tar.gz", hash = "sha256:5c3d86081fbebd04dd5de03626a0607b809a98fb6ccba5770b62466fe940ff20", size = 19890 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2d/82/e5d2c1c67d19841e9edc74954c827444ae826978499bde3dfc1d007c8c11/deepmerge-2.0-py3-none-any.whl", hash = "sha256:6de9ce507115cff0bed95ff0ce9ecc31088ef50cbdf09bc90a09349a318b3d00", size = 13475 },
+]
+
 [[package]]
 name = "deprecated"
 version = "1.2.18"
@@ -676,6 +685,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 },
 ]
 
+[[package]]
+name = "ecdsa"
+version = "0.19.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/1f/924e3caae75f471eae4b26bd13b698f6af2c44279f67af317439c2f4c46a/ecdsa-0.19.1.tar.gz", hash = "sha256:478cba7b62555866fcb3bb3fe985e06decbdb68ef55713c4e5ab98c57d508e61", size = 201793 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/a3/460c57f094a4a165c84a1341c373b0a4f5ec6ac244b998d5021aade89b77/ecdsa-0.19.1-py2.py3-none-any.whl", hash = "sha256:30638e27cf77b7e15c4c4cc1973720149e1033827cfd00661ca5c8cc0cdb24c3", size = 150607 },
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
@@ -854,6 +875,58 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/89/30/2bd0eb03a7dee7727cd2ec643d1e992979e62d5e7443507381cce0455132/googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741", size = 164985 },
 ]
 
+[[package]]
+name = "greenlet"
+version = "3.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/34/c1/a82edae11d46c0d83481aacaa1e578fea21d94a1ef400afd734d47ad95ad/greenlet-3.2.2.tar.gz", hash = "sha256:ad053d34421a2debba45aa3cc39acf454acbcd025b3fc1a9f8a0dee237abd485", size = 185797 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/66/910217271189cc3f32f670040235f4bf026ded8ca07270667d69c06e7324/greenlet-3.2.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:c49e9f7c6f625507ed83a7485366b46cbe325717c60837f7244fc99ba16ba9d6", size = 267395 },
+    { url = "https://files.pythonhosted.org/packages/a8/36/8d812402ca21017c82880f399309afadb78a0aa300a9b45d741e4df5d954/greenlet-3.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3cc1a3ed00ecfea8932477f729a9f616ad7347a5e55d50929efa50a86cb7be7", size = 625742 },
+    { url = "https://files.pythonhosted.org/packages/7b/77/66d7b59dfb7cc1102b2f880bc61cb165ee8998c9ec13c96606ba37e54c77/greenlet-3.2.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c9896249fbef2c615853b890ee854f22c671560226c9221cfd27c995db97e5c", size = 637014 },
+    { url = "https://files.pythonhosted.org/packages/36/a7/ff0d408f8086a0d9a5aac47fa1b33a040a9fca89bd5a3f7b54d1cd6e2793/greenlet-3.2.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7409796591d879425997a518138889d8d17e63ada7c99edc0d7a1c22007d4907", size = 632874 },
+    { url = "https://files.pythonhosted.org/packages/a1/75/1dc2603bf8184da9ebe69200849c53c3c1dca5b3a3d44d9f5ca06a930550/greenlet-3.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7791dcb496ec53d60c7f1c78eaa156c21f402dda38542a00afc3e20cae0f480f", size = 631652 },
+    { url = "https://files.pythonhosted.org/packages/7b/74/ddc8c3bd4c2c20548e5bf2b1d2e312a717d44e2eca3eadcfc207b5f5ad80/greenlet-3.2.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d8009ae46259e31bc73dc183e402f548e980c96f33a6ef58cc2e7865db012e13", size = 580619 },
+    { url = "https://files.pythonhosted.org/packages/7e/f2/40f26d7b3077b1c7ae7318a4de1f8ffc1d8ccbad8f1d8979bf5080250fd6/greenlet-3.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fd9fb7c941280e2c837b603850efc93c999ae58aae2b40765ed682a6907ebbc5", size = 1109809 },
+    { url = "https://files.pythonhosted.org/packages/c5/21/9329e8c276746b0d2318b696606753f5e7b72d478adcf4ad9a975521ea5f/greenlet-3.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:00cd814b8959b95a546e47e8d589610534cfb71f19802ea8a2ad99d95d702057", size = 1133455 },
+    { url = "https://files.pythonhosted.org/packages/bb/1e/0dca9619dbd736d6981f12f946a497ec21a0ea27262f563bca5729662d4d/greenlet-3.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:d0cb7d47199001de7658c213419358aa8937df767936506db0db7ce1a71f4a2f", size = 294991 },
+    { url = "https://files.pythonhosted.org/packages/a3/9f/a47e19261747b562ce88219e5ed8c859d42c6e01e73da6fbfa3f08a7be13/greenlet-3.2.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:dcb9cebbf3f62cb1e5afacae90761ccce0effb3adaa32339a0670fe7805d8068", size = 268635 },
+    { url = "https://files.pythonhosted.org/packages/11/80/a0042b91b66975f82a914d515e81c1944a3023f2ce1ed7a9b22e10b46919/greenlet-3.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf3fc9145141250907730886b031681dfcc0de1c158f3cc51c092223c0f381ce", size = 628786 },
+    { url = "https://files.pythonhosted.org/packages/38/a2/8336bf1e691013f72a6ebab55da04db81a11f68e82bb691f434909fa1327/greenlet-3.2.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:efcdfb9df109e8a3b475c016f60438fcd4be68cd13a365d42b35914cdab4bb2b", size = 640866 },
+    { url = "https://files.pythonhosted.org/packages/f8/7e/f2a3a13e424670a5d08826dab7468fa5e403e0fbe0b5f951ff1bc4425b45/greenlet-3.2.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bd139e4943547ce3a56ef4b8b1b9479f9e40bb47e72cc906f0f66b9d0d5cab3", size = 636752 },
+    { url = "https://files.pythonhosted.org/packages/fd/5d/ce4a03a36d956dcc29b761283f084eb4a3863401c7cb505f113f73af8774/greenlet-3.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71566302219b17ca354eb274dfd29b8da3c268e41b646f330e324e3967546a74", size = 636028 },
+    { url = "https://files.pythonhosted.org/packages/4b/29/b130946b57e3ceb039238413790dd3793c5e7b8e14a54968de1fe449a7cf/greenlet-3.2.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3091bc45e6b0c73f225374fefa1536cd91b1e987377b12ef5b19129b07d93ebe", size = 583869 },
+    { url = "https://files.pythonhosted.org/packages/ac/30/9f538dfe7f87b90ecc75e589d20cbd71635531a617a336c386d775725a8b/greenlet-3.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:44671c29da26539a5f142257eaba5110f71887c24d40df3ac87f1117df589e0e", size = 1112886 },
+    { url = "https://files.pythonhosted.org/packages/be/92/4b7deeb1a1e9c32c1b59fdca1cac3175731c23311ddca2ea28a8b6ada91c/greenlet-3.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c23ea227847c9dbe0b3910f5c0dd95658b607137614eb821e6cbaecd60d81cc6", size = 1138355 },
+    { url = "https://files.pythonhosted.org/packages/c5/eb/7551c751a2ea6498907b2fcbe31d7a54b602ba5e8eb9550a9695ca25d25c/greenlet-3.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:0a16fb934fcabfdfacf21d79e6fed81809d8cd97bc1be9d9c89f0e4567143d7b", size = 295437 },
+    { url = "https://files.pythonhosted.org/packages/2c/a1/88fdc6ce0df6ad361a30ed78d24c86ea32acb2b563f33e39e927b1da9ea0/greenlet-3.2.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:df4d1509efd4977e6a844ac96d8be0b9e5aa5d5c77aa27ca9f4d3f92d3fcf330", size = 270413 },
+    { url = "https://files.pythonhosted.org/packages/a6/2e/6c1caffd65490c68cd9bcec8cb7feb8ac7b27d38ba1fea121fdc1f2331dc/greenlet-3.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da956d534a6d1b9841f95ad0f18ace637668f680b1339ca4dcfb2c1837880a0b", size = 637242 },
+    { url = "https://files.pythonhosted.org/packages/98/28/088af2cedf8823b6b7ab029a5626302af4ca1037cf8b998bed3a8d3cb9e2/greenlet-3.2.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c7b15fb9b88d9ee07e076f5a683027bc3befd5bb5d25954bb633c385d8b737e", size = 651444 },
+    { url = "https://files.pythonhosted.org/packages/4a/9f/0116ab876bb0bc7a81eadc21c3f02cd6100dcd25a1cf2a085a130a63a26a/greenlet-3.2.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:752f0e79785e11180ebd2e726c8a88109ded3e2301d40abced2543aa5d164275", size = 646067 },
+    { url = "https://files.pythonhosted.org/packages/35/17/bb8f9c9580e28a94a9575da847c257953d5eb6e39ca888239183320c1c28/greenlet-3.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ae572c996ae4b5e122331e12bbb971ea49c08cc7c232d1bd43150800a2d6c65", size = 648153 },
+    { url = "https://files.pythonhosted.org/packages/2c/ee/7f31b6f7021b8df6f7203b53b9cc741b939a2591dcc6d899d8042fcf66f2/greenlet-3.2.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02f5972ff02c9cf615357c17ab713737cccfd0eaf69b951084a9fd43f39833d3", size = 603865 },
+    { url = "https://files.pythonhosted.org/packages/b5/2d/759fa59323b521c6f223276a4fc3d3719475dc9ae4c44c2fe7fc750f8de0/greenlet-3.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4fefc7aa68b34b9224490dfda2e70ccf2131368493add64b4ef2d372955c207e", size = 1119575 },
+    { url = "https://files.pythonhosted.org/packages/30/05/356813470060bce0e81c3df63ab8cd1967c1ff6f5189760c1a4734d405ba/greenlet-3.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a31ead8411a027c2c4759113cf2bd473690517494f3d6e4bf67064589afcd3c5", size = 1147460 },
+    { url = "https://files.pythonhosted.org/packages/07/f4/b2a26a309a04fb844c7406a4501331b9400e1dd7dd64d3450472fd47d2e1/greenlet-3.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:b24c7844c0a0afc3ccbeb0b807adeefb7eff2b5599229ecedddcfeb0ef333bec", size = 296239 },
+    { url = "https://files.pythonhosted.org/packages/89/30/97b49779fff8601af20972a62cc4af0c497c1504dfbb3e93be218e093f21/greenlet-3.2.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:3ab7194ee290302ca15449f601036007873028712e92ca15fc76597a0aeb4c59", size = 269150 },
+    { url = "https://files.pythonhosted.org/packages/21/30/877245def4220f684bc2e01df1c2e782c164e84b32e07373992f14a2d107/greenlet-3.2.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dc5c43bb65ec3669452af0ab10729e8fdc17f87a1f2ad7ec65d4aaaefabf6bf", size = 637381 },
+    { url = "https://files.pythonhosted.org/packages/8e/16/adf937908e1f913856b5371c1d8bdaef5f58f251d714085abeea73ecc471/greenlet-3.2.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:decb0658ec19e5c1f519faa9a160c0fc85a41a7e6654b3ce1b44b939f8bf1325", size = 651427 },
+    { url = "https://files.pythonhosted.org/packages/ad/49/6d79f58fa695b618654adac64e56aff2eeb13344dc28259af8f505662bb1/greenlet-3.2.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6fadd183186db360b61cb34e81117a096bff91c072929cd1b529eb20dd46e6c5", size = 645795 },
+    { url = "https://files.pythonhosted.org/packages/5a/e6/28ed5cb929c6b2f001e96b1d0698c622976cd8f1e41fe7ebc047fa7c6dd4/greenlet-3.2.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1919cbdc1c53ef739c94cf2985056bcc0838c1f217b57647cbf4578576c63825", size = 648398 },
+    { url = "https://files.pythonhosted.org/packages/9d/70/b200194e25ae86bc57077f695b6cc47ee3118becf54130c5514456cf8dac/greenlet-3.2.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3885f85b61798f4192d544aac7b25a04ece5fe2704670b4ab73c2d2c14ab740d", size = 606795 },
+    { url = "https://files.pythonhosted.org/packages/f8/c8/ba1def67513a941154ed8f9477ae6e5a03f645be6b507d3930f72ed508d3/greenlet-3.2.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:85f3e248507125bf4af607a26fd6cb8578776197bd4b66e35229cdf5acf1dfbf", size = 1117976 },
+    { url = "https://files.pythonhosted.org/packages/c3/30/d0e88c1cfcc1b3331d63c2b54a0a3a4a950ef202fb8b92e772ca714a9221/greenlet-3.2.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1e76106b6fc55fa3d6fe1c527f95ee65e324a13b62e243f77b48317346559708", size = 1145509 },
+    { url = "https://files.pythonhosted.org/packages/90/2e/59d6491834b6e289051b252cf4776d16da51c7c6ca6a87ff97e3a50aa0cd/greenlet-3.2.2-cp313-cp313-win_amd64.whl", hash = "sha256:fe46d4f8e94e637634d54477b0cfabcf93c53f29eedcbdeecaf2af32029b4421", size = 296023 },
+    { url = "https://files.pythonhosted.org/packages/65/66/8a73aace5a5335a1cba56d0da71b7bd93e450f17d372c5b7c5fa547557e9/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba30e88607fb6990544d84caf3c706c4b48f629e18853fc6a646f82db9629418", size = 629911 },
+    { url = "https://files.pythonhosted.org/packages/48/08/c8b8ebac4e0c95dcc68ec99198842e7db53eda4ab3fb0a4e785690883991/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:055916fafad3e3388d27dd68517478933a97edc2fc54ae79d3bec827de2c64c4", size = 635251 },
+    { url = "https://files.pythonhosted.org/packages/37/26/7db30868f73e86b9125264d2959acabea132b444b88185ba5c462cb8e571/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2593283bf81ca37d27d110956b79e8723f9aa50c4bcdc29d3c0543d4743d2763", size = 632620 },
+    { url = "https://files.pythonhosted.org/packages/10/ec/718a3bd56249e729016b0b69bee4adea0dfccf6ca43d147ef3b21edbca16/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89c69e9a10670eb7a66b8cef6354c24671ba241f46152dd3eed447f79c29fb5b", size = 628851 },
+    { url = "https://files.pythonhosted.org/packages/9b/9d/d1c79286a76bc62ccdc1387291464af16a4204ea717f24e77b0acd623b99/greenlet-3.2.2-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02a98600899ca1ca5d3a2590974c9e3ec259503b2d6ba6527605fcd74e08e207", size = 593718 },
+    { url = "https://files.pythonhosted.org/packages/cd/41/96ba2bf948f67b245784cd294b84e3d17933597dffd3acdb367a210d1949/greenlet-3.2.2-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:b50a8c5c162469c3209e5ec92ee4f95c8231b11db6a04db09bbe338176723bb8", size = 1105752 },
+    { url = "https://files.pythonhosted.org/packages/68/3b/3b97f9d33c1f2eb081759da62bd6162159db260f602f048bc2f36b4c453e/greenlet-3.2.2-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:45f9f4853fb4cc46783085261c9ec4706628f3b57de3e68bae03e8f8b3c0de51", size = 1125170 },
+    { url = "https://files.pythonhosted.org/packages/31/df/b7d17d66c8d0f578d2885a3d8f565e9e4725eacc9d3fdc946d0031c055c4/greenlet-3.2.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:9ea5231428af34226c05f927e16fc7f6fa5e39e3ad3cd24ffa48ba53a47f4240", size = 269899 },
+]
+
 [[package]]
 name = "grpcio"
 version = "1.71.0"
@@ -957,11 +1030,11 @@ wheels = [
 
 [[package]]
 name = "h11"
-version = "0.14.0"
+version = "0.16.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418 }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
 ]
 
 [[package]]
@@ -988,15 +1061,15 @@ wheels = [
 
 [[package]]
 name = "httpcore"
-version = "1.0.7"
+version = "1.0.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6a/41/d7d0a89eb493922c37d343b607bc1b5da7f5be7e383740b4753ad8943e90/httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c", size = 85196 }
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/f5/72347bc88306acb359581ac4d52f23c0ef445b57157adedb9aee0cd689d2/httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd", size = 78551 },
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 },
 ]
 
 [[package]]
@@ -1372,13 +1445,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/dc/1e/408fd10217eac0e43aea0604be22b4851a09e03d761d44d4ea12089dd70e/levenshtein-0.27.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:7987ef006a3cf56a4532bd4c90c2d3b7b4ca9ad3bf8ae1ee5713c4a3bdfda913", size = 98045 },
 ]
 
+[[package]]
+name = "linkify"
+version = "1.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/c6/246100fa3967074d9725b3716913bd495823547bde5047050d4c3462f994/linkify-1.4.tar.gz", hash = "sha256:9ba276ba179525f7262820d90f009604e51cd4f1466c1112b882ef7eda243d5e", size = 1749 }
+
 [[package]]
 name = "llama-stack"
-version = "0.2.1"
+version = "0.2.9"
 source = { editable = "." }
 dependencies = [
-    { name = "blobfile" },
+    { name = "aiohttp" },
     { name = "fire" },
+    { name = "h11" },
     { name = "httpx" },
     { name = "huggingface-hub" },
     { name = "jinja2" },
@@ -1389,14 +1469,25 @@ dependencies = [
     { name = "prompt-toolkit" },
     { name = "pydantic" },
     { name = "python-dotenv" },
+    { name = "python-jose" },
     { name = "requests" },
     { name = "rich" },
     { name = "setuptools" },
+    { name = "starlette" },
     { name = "termcolor" },
     { name = "tiktoken" },
 ]
 
 [package.optional-dependencies]
+ui = [
+    { name = "llama-stack-client" },
+    { name = "pandas" },
+    { name = "streamlit" },
+    { name = "streamlit-keycloak" },
+    { name = "streamlit-option-menu" },
+]
+
+[package.dev-dependencies]
 codegen = [
     { name = "jinja2" },
     { name = "pydantic" },
@@ -1411,6 +1502,8 @@ dev = [
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
     { name = "pytest-html" },
+    { name = "pytest-json-report" },
+    { name = "pytest-timeout" },
     { name = "ruamel-yaml" },
     { name = "ruff" },
     { name = "types-requests" },
@@ -1418,7 +1511,9 @@ dev = [
     { name = "uvicorn" },
 ]
 docs = [
+    { name = "linkify" },
     { name = "myst-parser" },
+    { name = "sphinx" },
     { name = "sphinx-autobuild" },
     { name = "sphinx-copybutton" },
     { name = "sphinx-design" },
@@ -1426,6 +1521,7 @@ docs = [
     { name = "sphinx-rtd-theme" },
     { name = "sphinx-tabs" },
     { name = "sphinxcontrib-mermaid" },
+    { name = "sphinxcontrib-openapi" },
     { name = "sphinxcontrib-redoc" },
     { name = "sphinxcontrib-video" },
     { name = "tomli" },
@@ -1441,102 +1537,131 @@ test = [
     { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "opentelemetry-sdk" },
     { name = "pypdf" },
+    { name = "sqlalchemy", extra = ["asyncio"] },
     { name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
     { name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
     { name = "torchvision", version = "0.21.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
     { name = "torchvision", version = "0.21.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
-]
-ui = [
-    { name = "llama-stack-client" },
-    { name = "pandas" },
-    { name = "streamlit" },
-    { name = "streamlit-option-menu" },
+    { name = "transformers" },
 ]
 unit = [
     { name = "aiohttp" },
     { name = "aiosqlite" },
+    { name = "blobfile" },
     { name = "chardet" },
+    { name = "mcp" },
     { name = "openai" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "pypdf" },
     { name = "qdrant-client" },
+    { name = "sqlalchemy", extra = ["asyncio"] },
     { name = "sqlite-vec" },
 ]
 
 [package.metadata]
 requires-dist = [
-    { name = "aiohttp", marker = "extra == 'test'" },
-    { name = "aiohttp", marker = "extra == 'unit'" },
-    { name = "aiosqlite", marker = "extra == 'test'" },
-    { name = "aiosqlite", marker = "extra == 'unit'" },
-    { name = "autoevals", marker = "extra == 'test'" },
-    { name = "black", marker = "extra == 'dev'" },
-    { name = "blobfile" },
-    { name = "chardet", marker = "extra == 'test'" },
-    { name = "chardet", marker = "extra == 'unit'" },
-    { name = "datasets", marker = "extra == 'test'" },
-    { name = "fastapi", marker = "extra == 'dev'" },
+    { name = "aiohttp" },
     { name = "fire" },
+    { name = "h11", specifier = ">=0.16.0" },
     { name = "httpx" },
     { name = "huggingface-hub" },
     { name = "jinja2", specifier = ">=3.1.6" },
-    { name = "jinja2", marker = "extra == 'codegen'", specifier = ">=3.1.6" },
     { name = "jsonschema" },
-    { name = "llama-stack-client", specifier = ">=0.2.1" },
-    { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.1" },
-    { name = "mcp", marker = "extra == 'test'" },
-    { name = "myst-parser", marker = "extra == 'docs'" },
-    { name = "nbval", marker = "extra == 'dev'" },
+    { name = "llama-stack-client", specifier = ">=0.2.9" },
+    { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.9" },
     { name = "openai", specifier = ">=1.66" },
-    { name = "openai", marker = "extra == 'test'" },
-    { name = "openai", marker = "extra == 'unit'" },
-    { name = "opentelemetry-exporter-otlp-proto-http", marker = "extra == 'test'" },
-    { name = "opentelemetry-sdk", marker = "extra == 'test'" },
     { name = "pandas", marker = "extra == 'ui'" },
     { name = "pillow" },
-    { name = "pre-commit", marker = "extra == 'dev'" },
     { name = "prompt-toolkit" },
     { name = "pydantic", specifier = ">=2" },
-    { name = "pydantic", marker = "extra == 'codegen'" },
-    { name = "pypdf", marker = "extra == 'test'" },
-    { name = "pypdf", marker = "extra == 'unit'" },
-    { name = "pytest", marker = "extra == 'dev'" },
-    { name = "pytest-asyncio", marker = "extra == 'dev'" },
-    { name = "pytest-cov", marker = "extra == 'dev'" },
-    { name = "pytest-html", marker = "extra == 'dev'" },
     { name = "python-dotenv" },
-    { name = "qdrant-client", marker = "extra == 'unit'" },
+    { name = "python-jose" },
     { name = "requests" },
     { name = "rich" },
-    { name = "rich", marker = "extra == 'codegen'" },
-    { name = "ruamel-yaml", marker = "extra == 'dev'" },
-    { name = "ruff", marker = "extra == 'dev'" },
     { name = "setuptools" },
-    { name = "sphinx-autobuild", marker = "extra == 'docs'" },
-    { name = "sphinx-copybutton", marker = "extra == 'docs'" },
-    { name = "sphinx-design", marker = "extra == 'docs'" },
-    { name = "sphinx-rtd-dark-mode", marker = "extra == 'docs'" },
-    { name = "sphinx-rtd-theme", marker = "extra == 'docs'" },
-    { name = "sphinx-tabs", marker = "extra == 'docs'" },
-    { name = "sphinxcontrib-mermaid", marker = "extra == 'docs'" },
-    { name = "sphinxcontrib-redoc", marker = "extra == 'docs'" },
-    { name = "sphinxcontrib-video", marker = "extra == 'docs'" },
-    { name = "sqlite-vec", marker = "extra == 'unit'" },
+    { name = "starlette" },
     { name = "streamlit", marker = "extra == 'ui'" },
+    { name = "streamlit-keycloak", marker = "extra == 'ui'" },
     { name = "streamlit-option-menu", marker = "extra == 'ui'" },
     { name = "termcolor" },
     { name = "tiktoken" },
-    { name = "tomli", marker = "extra == 'docs'" },
-    { name = "torch", marker = "extra == 'test'", specifier = ">=2.6.0", index = "https://download.pytorch.org/whl/cpu" },
-    { name = "torchvision", marker = "extra == 'test'", specifier = ">=0.21.0", index = "https://download.pytorch.org/whl/cpu" },
-    { name = "types-requests", marker = "extra == 'dev'" },
-    { name = "types-setuptools", marker = "extra == 'dev'" },
-    { name = "uvicorn", marker = "extra == 'dev'" },
 ]
-provides-extras = ["dev", "unit", "test", "docs", "codegen", "ui"]
+provides-extras = ["ui"]
+
+[package.metadata.requires-dev]
+codegen = [
+    { name = "jinja2", specifier = ">=3.1.6" },
+    { name = "pydantic" },
+    { name = "rich" },
+]
+dev = [
+    { name = "black" },
+    { name = "fastapi" },
+    { name = "nbval" },
+    { name = "pre-commit" },
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-cov" },
+    { name = "pytest-html" },
+    { name = "pytest-json-report" },
+    { name = "pytest-timeout" },
+    { name = "ruamel-yaml" },
+    { name = "ruff" },
+    { name = "types-requests" },
+    { name = "types-setuptools" },
+    { name = "uvicorn" },
+]
+docs = [
+    { name = "linkify" },
+    { name = "myst-parser" },
+    { name = "sphinx" },
+    { name = "sphinx-autobuild" },
+    { name = "sphinx-copybutton" },
+    { name = "sphinx-design" },
+    { name = "sphinx-rtd-dark-mode" },
+    { name = "sphinx-rtd-theme" },
+    { name = "sphinx-tabs" },
+    { name = "sphinxcontrib-mermaid" },
+    { name = "sphinxcontrib-openapi" },
+    { name = "sphinxcontrib-redoc" },
+    { name = "sphinxcontrib-video" },
+    { name = "tomli" },
+]
+test = [
+    { name = "aiohttp" },
+    { name = "aiosqlite" },
+    { name = "autoevals" },
+    { name = "chardet" },
+    { name = "datasets" },
+    { name = "mcp" },
+    { name = "openai" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-sdk" },
+    { name = "pypdf" },
+    { name = "sqlalchemy" },
+    { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
+    { name = "torch", specifier = ">=2.6.0", index = "https://download.pytorch.org/whl/cpu" },
+    { name = "torchvision", specifier = ">=0.21.0", index = "https://download.pytorch.org/whl/cpu" },
+    { name = "transformers" },
+]
+unit = [
+    { name = "aiohttp" },
+    { name = "aiosqlite" },
+    { name = "blobfile" },
+    { name = "chardet" },
+    { name = "mcp" },
+    { name = "openai" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "pypdf" },
+    { name = "qdrant-client" },
+    { name = "sqlalchemy" },
+    { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
+    { name = "sqlite-vec" },
+]
 
 [[package]]
 name = "llama-stack-client"
-version = "0.2.1"
+version = "0.2.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1553,91 +1678,91 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bb/5c/5fed03a18bfd6fb27dcf531504dfdaa5e9b79447f4530196baf16bbdddfe/llama_stack_client-0.2.1.tar.gz", hash = "sha256:2be016898ad9f12e57d6125cae26253b8cce7d894c028b9e42f58d421e7825ce", size = 242809 }
+sdist = { url = "https://files.pythonhosted.org/packages/91/86/80c2d87db0c4320c4e1cfb616d92c823e67d04b7cff41b32a14f188c0e43/llama_stack_client-0.2.9.tar.gz", hash = "sha256:529c24e17d3f2d57abc619c88b9989bf2a96dc1c90c3a80c5361e25a798e7729", size = 269660 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/90/e7/23051fe5073f2fda3f509b19d0e4d7e76e3a8cfaa3606077a2bcef9a0bf0/llama_stack_client-0.2.1-py3-none-any.whl", hash = "sha256:8db3179aab48d6abf82b89ef0a2014e404faf4a72f825c0ffd467fdc4ab5f02c", size = 274293 },
+    { url = "https://files.pythonhosted.org/packages/69/ca/f251a9c4e81cabc3f8f8c737db81cce0d9785b2d547627ddbb3563c02c26/llama_stack_client-0.2.9-py3-none-any.whl", hash = "sha256:20eca8fc6f6c2e3f7a20fa2cf45782a3c5dc20d430746d563de4966af6cd42e9", size = 307590 },
 ]
 
 [[package]]
 name = "lxml"
-version = "5.3.1"
+version = "5.4.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ef/f6/c15ca8e5646e937c148e147244817672cf920b56ac0bf2cc1512ae674be8/lxml-5.3.1.tar.gz", hash = "sha256:106b7b5d2977b339f1e97efe2778e2ab20e99994cbb0ec5e55771ed0795920c8", size = 3678591 }
+sdist = { url = "https://files.pythonhosted.org/packages/76/3d/14e82fc7c8fb1b7761f7e748fd47e2ec8276d137b6acfe5a4bb73853e08f/lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd", size = 3679479 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/80/4b/73426192004a643c11a644ed2346dbe72da164c8e775ea2e70f60e63e516/lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b", size = 8142766 },
-    { url = "https://files.pythonhosted.org/packages/30/c2/3b28f642b43fdf9580d936e8fdd3ec43c01a97ecfe17fd67f76ce9099752/lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b", size = 4422744 },
-    { url = "https://files.pythonhosted.org/packages/1f/a5/45279e464174b99d72d25bc018b097f9211c0925a174ca582a415609f036/lxml-5.3.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:528f3a0498a8edc69af0559bdcf8a9f5a8bf7c00051a6ef3141fdcf27017bbf5", size = 5229609 },
-    { url = "https://files.pythonhosted.org/packages/f0/e7/10cd8b9e27ffb6b3465b76604725b67b7c70d4e399750ff88de1b38ab9eb/lxml-5.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4743e30d6f5f92b6d2b7c86b3ad250e0bad8dee4b7ad8a0c44bfb276af89a3", size = 4943509 },
-    { url = "https://files.pythonhosted.org/packages/ce/54/2d6f634924920b17122445136345d44c6d69178c9c49e161aa8f206739d6/lxml-5.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b5d7f8acf809465086d498d62a981fa6a56d2718135bb0e4aa48c502055f5c", size = 5561495 },
-    { url = "https://files.pythonhosted.org/packages/a2/fe/7f5ae8fd1f357fcb21b0d4e20416fae870d654380b6487adbcaaf0df9b31/lxml-5.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:928e75a7200a4c09e6efc7482a1337919cc61fe1ba289f297827a5b76d8969c2", size = 4998970 },
-    { url = "https://files.pythonhosted.org/packages/af/70/22fecb6f2ca8dc77d14ab6be3cef767ff8340040bc95dca384b5b1cb333a/lxml-5.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a997b784a639e05b9d4053ef3b20c7e447ea80814a762f25b8ed5a89d261eac", size = 5114205 },
-    { url = "https://files.pythonhosted.org/packages/63/91/21619cc14f7fd1de3f1bdf86cc8106edacf4d685b540d658d84247a3a32a/lxml-5.3.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7b82e67c5feb682dbb559c3e6b78355f234943053af61606af126df2183b9ef9", size = 4940823 },
-    { url = "https://files.pythonhosted.org/packages/50/0f/27183248fa3cdd2040047ceccd320ff1ed1344167f38a4ac26aed092268b/lxml-5.3.1-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:f1de541a9893cf8a1b1db9bf0bf670a2decab42e3e82233d36a74eda7822b4c9", size = 5585725 },
-    { url = "https://files.pythonhosted.org/packages/c6/8d/9b7388d5b23ed2f239a992a478cbd0ce313aaa2d008dd73c4042b190b6a9/lxml-5.3.1-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:de1fc314c3ad6bc2f6bd5b5a5b9357b8c6896333d27fdbb7049aea8bd5af2d79", size = 5082641 },
-    { url = "https://files.pythonhosted.org/packages/65/8e/590e20833220eac55b6abcde71d3ae629d38ac1c3543bcc2bfe1f3c2f5d1/lxml-5.3.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:7c0536bd9178f754b277a3e53f90f9c9454a3bd108b1531ffff720e082d824f2", size = 5161219 },
-    { url = "https://files.pythonhosted.org/packages/4e/77/cabdf5569fd0415a88ebd1d62d7f2814e71422439b8564aaa03e7eefc069/lxml-5.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68018c4c67d7e89951a91fbd371e2e34cd8cfc71f0bb43b5332db38497025d51", size = 5019293 },
-    { url = "https://files.pythonhosted.org/packages/49/bd/f0b6d50ea7b8b54aaa5df4410cb1d5ae6ffa016b8e0503cae08b86c24674/lxml-5.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa826340a609d0c954ba52fd831f0fba2a4165659ab0ee1a15e4aac21f302406", size = 5651232 },
-    { url = "https://files.pythonhosted.org/packages/fa/69/1793d00a4e3da7f27349edb5a6f3da947ed921263cd9a243fab11c6cbc07/lxml-5.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:796520afa499732191e39fc95b56a3b07f95256f2d22b1c26e217fb69a9db5b5", size = 5489527 },
-    { url = "https://files.pythonhosted.org/packages/d3/c9/e2449129b6cb2054c898df8d850ea4dadd75b4c33695a6c4b0f35082f1e7/lxml-5.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3effe081b3135237da6e4c4530ff2a868d3f80be0bda027e118a5971285d42d0", size = 5227050 },
-    { url = "https://files.pythonhosted.org/packages/ed/63/e5da540eba6ab9a0d4188eeaa5c85767b77cafa8efeb70da0593d6cd3b81/lxml-5.3.1-cp310-cp310-win32.whl", hash = "sha256:a22f66270bd6d0804b02cd49dae2b33d4341015545d17f8426f2c4e22f557a23", size = 3475345 },
-    { url = "https://files.pythonhosted.org/packages/08/71/853a3ad812cd24c35b7776977cb0ae40c2b64ff79ad6d6c36c987daffc49/lxml-5.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:0bcfadea3cdc68e678d2b20cb16a16716887dd00a881e16f7d806c2138b8ff0c", size = 3805093 },
-    { url = "https://files.pythonhosted.org/packages/57/bb/2faea15df82114fa27f2a86eec220506c532ee8ce211dff22f48881b353a/lxml-5.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e220f7b3e8656ab063d2eb0cd536fafef396829cafe04cb314e734f87649058f", size = 8161781 },
-    { url = "https://files.pythonhosted.org/packages/9f/d3/374114084abb1f96026eccb6cd48b070f85de82fdabae6c2f1e198fa64e5/lxml-5.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f2cfae0688fd01f7056a17367e3b84f37c545fb447d7282cf2c242b16262607", size = 4432571 },
-    { url = "https://files.pythonhosted.org/packages/0f/fb/44a46efdc235c2dd763c1e929611d8ff3b920c32b8fcd9051d38f4d04633/lxml-5.3.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67d2f8ad9dcc3a9e826bdc7802ed541a44e124c29b7d95a679eeb58c1c14ade8", size = 5028919 },
-    { url = "https://files.pythonhosted.org/packages/3b/e5/168ddf9f16a90b590df509858ae97a8219d6999d5a132ad9f72427454bed/lxml-5.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db0c742aad702fd5d0c6611a73f9602f20aec2007c102630c06d7633d9c8f09a", size = 4769599 },
-    { url = "https://files.pythonhosted.org/packages/f9/0e/3e2742c6f4854b202eb8587c1f7ed760179f6a9fcb34a460497c8c8f3078/lxml-5.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:198bb4b4dd888e8390afa4f170d4fa28467a7eaf857f1952589f16cfbb67af27", size = 5369260 },
-    { url = "https://files.pythonhosted.org/packages/b8/03/b2f2ab9e33c47609c80665e75efed258b030717e06693835413b34e797cb/lxml-5.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2a3e412ce1849be34b45922bfef03df32d1410a06d1cdeb793a343c2f1fd666", size = 4842798 },
-    { url = "https://files.pythonhosted.org/packages/93/ad/0ecfb082b842358c8a9e3115ec944b7240f89821baa8cd7c0cb8a38e05cb/lxml-5.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b8969dbc8d09d9cd2ae06362c3bad27d03f433252601ef658a49bd9f2b22d79", size = 4917531 },
-    { url = "https://files.pythonhosted.org/packages/64/5b/3e93d8ebd2b7eb984c2ad74dfff75493ce96e7b954b12e4f5fc34a700414/lxml-5.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5be8f5e4044146a69c96077c7e08f0709c13a314aa5315981185c1f00235fe65", size = 4791500 },
-    { url = "https://files.pythonhosted.org/packages/91/83/7dc412362ee7a0259c7f64349393262525061fad551a1340ef92c59d9732/lxml-5.3.1-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:133f3493253a00db2c870d3740bc458ebb7d937bd0a6a4f9328373e0db305709", size = 5404557 },
-    { url = "https://files.pythonhosted.org/packages/1e/41/c337f121d9dca148431f246825e021fa1a3f66a6b975deab1950530fdb04/lxml-5.3.1-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:52d82b0d436edd6a1d22d94a344b9a58abd6c68c357ed44f22d4ba8179b37629", size = 4931386 },
-    { url = "https://files.pythonhosted.org/packages/a5/73/762c319c4906b3db67e4abc7cfe7d66c34996edb6d0e8cb60f462954d662/lxml-5.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b6f92e35e2658a5ed51c6634ceb5ddae32053182851d8cad2a5bc102a359b33", size = 4982124 },
-    { url = "https://files.pythonhosted.org/packages/c1/e7/d1e296cb3b3b46371220a31350730948d7bea41cc9123c5fd219dea33c29/lxml-5.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:203b1d3eaebd34277be06a3eb880050f18a4e4d60861efba4fb946e31071a295", size = 4852742 },
-    { url = "https://files.pythonhosted.org/packages/df/90/4adc854475105b93ead6c0c736f762d29371751340dcf5588cfcf8191b8a/lxml-5.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:155e1a5693cf4b55af652f5c0f78ef36596c7f680ff3ec6eb4d7d85367259b2c", size = 5457004 },
-    { url = "https://files.pythonhosted.org/packages/f0/0d/39864efbd231c13eb53edee2ab91c742c24d2f93efe2af7d3fe4343e42c1/lxml-5.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22ec2b3c191f43ed21f9545e9df94c37c6b49a5af0a874008ddc9132d49a2d9c", size = 5298185 },
-    { url = "https://files.pythonhosted.org/packages/8d/7a/630a64ceb1088196de182e2e33b5899691c3e1ae21af688e394208bd6810/lxml-5.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7eda194dd46e40ec745bf76795a7cccb02a6a41f445ad49d3cf66518b0bd9cff", size = 5032707 },
-    { url = "https://files.pythonhosted.org/packages/b2/3d/091bc7b592333754cb346c1507ca948ab39bc89d83577ac8f1da3be4dece/lxml-5.3.1-cp311-cp311-win32.whl", hash = "sha256:fb7c61d4be18e930f75948705e9718618862e6fc2ed0d7159b2262be73f167a2", size = 3474288 },
-    { url = "https://files.pythonhosted.org/packages/12/8c/7d47cfc0d04fd4e3639ec7e1c96c2561d5e890eb900de8f76eea75e0964a/lxml-5.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c809eef167bf4a57af4b03007004896f5c60bd38dc3852fcd97a26eae3d4c9e6", size = 3815031 },
-    { url = "https://files.pythonhosted.org/packages/3b/f4/5121aa9ee8e09b8b8a28cf3709552efe3d206ca51a20d6fa471b60bb3447/lxml-5.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e69add9b6b7b08c60d7ff0152c7c9a6c45b4a71a919be5abde6f98f1ea16421c", size = 8191889 },
-    { url = "https://files.pythonhosted.org/packages/0a/ca/8e9aa01edddc74878f4aea85aa9ab64372f46aa804d1c36dda861bf9eabf/lxml-5.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4e52e1b148867b01c05e21837586ee307a01e793b94072d7c7b91d2c2da02ffe", size = 4450685 },
-    { url = "https://files.pythonhosted.org/packages/b2/b3/ea40a5c98619fbd7e9349df7007994506d396b97620ced34e4e5053d3734/lxml-5.3.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4b382e0e636ed54cd278791d93fe2c4f370772743f02bcbe431a160089025c9", size = 5051722 },
-    { url = "https://files.pythonhosted.org/packages/3a/5e/375418be35f8a695cadfe7e7412f16520e62e24952ed93c64c9554755464/lxml-5.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e49dc23a10a1296b04ca9db200c44d3eb32c8d8ec532e8c1fd24792276522a", size = 4786661 },
-    { url = "https://files.pythonhosted.org/packages/79/7c/d258eaaa9560f6664f9b426a5165103015bee6512d8931e17342278bad0a/lxml-5.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4399b4226c4785575fb20998dc571bc48125dc92c367ce2602d0d70e0c455eb0", size = 5311766 },
-    { url = "https://files.pythonhosted.org/packages/03/bc/a041415be4135a1b3fdf017a5d873244cc16689456166fbdec4b27fba153/lxml-5.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5412500e0dc5481b1ee9cf6b38bb3b473f6e411eb62b83dc9b62699c3b7b79f7", size = 4836014 },
-    { url = "https://files.pythonhosted.org/packages/32/88/047f24967d5e3fc97848ea2c207eeef0f16239cdc47368c8b95a8dc93a33/lxml-5.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c93ed3c998ea8472be98fb55aed65b5198740bfceaec07b2eba551e55b7b9ae", size = 4961064 },
-    { url = "https://files.pythonhosted.org/packages/3d/b5/ecf5a20937ecd21af02c5374020f4e3a3538e10a32379a7553fca3d77094/lxml-5.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:63d57fc94eb0bbb4735e45517afc21ef262991d8758a8f2f05dd6e4174944519", size = 4778341 },
-    { url = "https://files.pythonhosted.org/packages/a4/05/56c359e07275911ed5f35ab1d63c8cd3360d395fb91e43927a2ae90b0322/lxml-5.3.1-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:b450d7cabcd49aa7ab46a3c6aa3ac7e1593600a1a0605ba536ec0f1b99a04322", size = 5345450 },
-    { url = "https://files.pythonhosted.org/packages/b7/f4/f95e3ae12e9f32fbcde00f9affa6b0df07f495117f62dbb796a9a31c84d6/lxml-5.3.1-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:4df0ec814b50275ad6a99bc82a38b59f90e10e47714ac9871e1b223895825468", size = 4908336 },
-    { url = "https://files.pythonhosted.org/packages/c5/f8/309546aec092434166a6e11c7dcecb5c2d0a787c18c072d61e18da9eba57/lxml-5.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d184f85ad2bb1f261eac55cddfcf62a70dee89982c978e92b9a74a1bfef2e367", size = 4986049 },
-    { url = "https://files.pythonhosted.org/packages/71/1c/b951817cb5058ca7c332d012dfe8bc59dabd0f0a8911ddd7b7ea8e41cfbd/lxml-5.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b725e70d15906d24615201e650d5b0388b08a5187a55f119f25874d0103f90dd", size = 4860351 },
-    { url = "https://files.pythonhosted.org/packages/31/23/45feba8dae1d35fcca1e51b051f59dc4223cbd23e071a31e25f3f73938a8/lxml-5.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a31fa7536ec1fb7155a0cd3a4e3d956c835ad0a43e3610ca32384d01f079ea1c", size = 5421580 },
-    { url = "https://files.pythonhosted.org/packages/61/69/be245d7b2dbef81c542af59c97fcd641fbf45accf2dc1c325bae7d0d014c/lxml-5.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c3c8b55c7fc7b7e8877b9366568cc73d68b82da7fe33d8b98527b73857a225f", size = 5285778 },
-    { url = "https://files.pythonhosted.org/packages/69/06/128af2ed04bac99b8f83becfb74c480f1aa18407b5c329fad457e08a1bf4/lxml-5.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d61ec60945d694df806a9aec88e8f29a27293c6e424f8ff91c80416e3c617645", size = 5054455 },
-    { url = "https://files.pythonhosted.org/packages/8a/2d/f03a21cf6cc75cdd083563e509c7b6b159d761115c4142abb5481094ed8c/lxml-5.3.1-cp312-cp312-win32.whl", hash = "sha256:f4eac0584cdc3285ef2e74eee1513a6001681fd9753b259e8159421ed28a72e5", size = 3486315 },
-    { url = "https://files.pythonhosted.org/packages/2b/9c/8abe21585d20ef70ad9cec7562da4332b764ed69ec29b7389d23dfabcea0/lxml-5.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:29bfc8d3d88e56ea0a27e7c4897b642706840247f59f4377d81be8f32aa0cfbf", size = 3816925 },
-    { url = "https://files.pythonhosted.org/packages/94/1c/724931daa1ace168e0237b929e44062545bf1551974102a5762c349c668d/lxml-5.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c093c7088b40d8266f57ed71d93112bd64c6724d31f0794c1e52cc4857c28e0e", size = 8171881 },
-    { url = "https://files.pythonhosted.org/packages/67/0c/857b8fb6010c4246e66abeebb8639eaabba60a6d9b7c606554ecc5cbf1ee/lxml-5.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b0884e3f22d87c30694e625b1e62e6f30d39782c806287450d9dc2fdf07692fd", size = 4440394 },
-    { url = "https://files.pythonhosted.org/packages/61/72/c9e81de6a000f9682ccdd13503db26e973b24c68ac45a7029173237e3eed/lxml-5.3.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1637fa31ec682cd5760092adfabe86d9b718a75d43e65e211d5931809bc111e7", size = 5037860 },
-    { url = "https://files.pythonhosted.org/packages/24/26/942048c4b14835711b583b48cd7209bd2b5f0b6939ceed2381a494138b14/lxml-5.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a364e8e944d92dcbf33b6b494d4e0fb3499dcc3bd9485beb701aa4b4201fa414", size = 4782513 },
-    { url = "https://files.pythonhosted.org/packages/e2/65/27792339caf00f610cc5be32b940ba1e3009b7054feb0c4527cebac228d4/lxml-5.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:779e851fd0e19795ccc8a9bb4d705d6baa0ef475329fe44a13cf1e962f18ff1e", size = 5305227 },
-    { url = "https://files.pythonhosted.org/packages/18/e1/25f7aa434a4d0d8e8420580af05ea49c3e12db6d297cf5435ac0a054df56/lxml-5.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c4393600915c308e546dc7003d74371744234e8444a28622d76fe19b98fa59d1", size = 4829846 },
-    { url = "https://files.pythonhosted.org/packages/fe/ed/faf235e0792547d24f61ee1448159325448a7e4f2ab706503049d8e5df19/lxml-5.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:673b9d8e780f455091200bba8534d5f4f465944cbdd61f31dc832d70e29064a5", size = 4949495 },
-    { url = "https://files.pythonhosted.org/packages/e5/e1/8f572ad9ed6039ba30f26dd4c2c58fb90f79362d2ee35ca3820284767672/lxml-5.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2e4a570f6a99e96c457f7bec5ad459c9c420ee80b99eb04cbfcfe3fc18ec6423", size = 4773415 },
-    { url = "https://files.pythonhosted.org/packages/a3/75/6b57166b9d1983dac8f28f354e38bff8d6bcab013a241989c4d54c72701b/lxml-5.3.1-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:71f31eda4e370f46af42fc9f264fafa1b09f46ba07bdbee98f25689a04b81c20", size = 5337710 },
-    { url = "https://files.pythonhosted.org/packages/cc/71/4aa56e2daa83bbcc66ca27b5155be2f900d996f5d0c51078eaaac8df9547/lxml-5.3.1-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:42978a68d3825eaac55399eb37a4d52012a205c0c6262199b8b44fcc6fd686e8", size = 4897362 },
-    { url = "https://files.pythonhosted.org/packages/65/10/3fa2da152cd9b49332fd23356ed7643c9b74cad636ddd5b2400a9730d12b/lxml-5.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8b1942b3e4ed9ed551ed3083a2e6e0772de1e5e3aca872d955e2e86385fb7ff9", size = 4977795 },
-    { url = "https://files.pythonhosted.org/packages/de/d2/e1da0f7b20827e7b0ce934963cb6334c1b02cf1bb4aecd218c4496880cb3/lxml-5.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:85c4f11be9cf08917ac2a5a8b6e1ef63b2f8e3799cec194417e76826e5f1de9c", size = 4858104 },
-    { url = "https://files.pythonhosted.org/packages/a5/35/063420e1b33d3308f5aa7fcbdd19ef6c036f741c9a7a4bd5dc8032486b27/lxml-5.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:231cf4d140b22a923b1d0a0a4e0b4f972e5893efcdec188934cc65888fd0227b", size = 5416531 },
-    { url = "https://files.pythonhosted.org/packages/c3/83/93a6457d291d1e37adfb54df23498101a4701834258c840381dd2f6a030e/lxml-5.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5865b270b420eda7b68928d70bb517ccbe045e53b1a428129bb44372bf3d7dd5", size = 5273040 },
-    { url = "https://files.pythonhosted.org/packages/39/25/ad4ac8fac488505a2702656550e63c2a8db3a4fd63db82a20dad5689cecb/lxml-5.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dbf7bebc2275016cddf3c997bf8a0f7044160714c64a9b83975670a04e6d2252", size = 5050951 },
-    { url = "https://files.pythonhosted.org/packages/82/74/f7d223c704c87e44b3d27b5e0dde173a2fcf2e89c0524c8015c2b3554876/lxml-5.3.1-cp313-cp313-win32.whl", hash = "sha256:d0751528b97d2b19a388b302be2a0ee05817097bab46ff0ed76feeec24951f78", size = 3485357 },
-    { url = "https://files.pythonhosted.org/packages/80/83/8c54533b3576f4391eebea88454738978669a6cad0d8e23266224007939d/lxml-5.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:91fb6a43d72b4f8863d21f347a9163eecbf36e76e2f51068d59cd004c506f332", size = 3814484 },
-    { url = "https://files.pythonhosted.org/packages/d2/b4/89a68d05f267f05cc1b8b2f289a8242955705b1b0a9d246198227817ee46/lxml-5.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:afa578b6524ff85fb365f454cf61683771d0170470c48ad9d170c48075f86725", size = 3936118 },
-    { url = "https://files.pythonhosted.org/packages/7f/0d/c034a541e7a1153527d7880c62493a74f2277f38e64de2480cadd0d4cf96/lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f5e80adf0aafc7b5454f2c1cb0cde920c9b1f2cbd0485f07cc1d0497c35c5d", size = 4233690 },
-    { url = "https://files.pythonhosted.org/packages/35/5c/38e183c2802f14fbdaa75c3266e11d0ca05c64d78e8cdab2ee84e954a565/lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd0b80ac2d8f13ffc906123a6f20b459cb50a99222d0da492360512f3e50f84", size = 4349569 },
-    { url = "https://files.pythonhosted.org/packages/18/5b/14f93b359b3c29673d5d282bc3a6edb3a629879854a77541841aba37607f/lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:422c179022ecdedbe58b0e242607198580804253da220e9454ffe848daa1cfd2", size = 4236731 },
-    { url = "https://files.pythonhosted.org/packages/f6/08/8471de65f3dee70a3a50e7082fd7409f0ac7a1ace777c13fca4aea1a5759/lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:524ccfded8989a6595dbdda80d779fb977dbc9a7bc458864fc9a0c2fc15dc877", size = 4373119 },
-    { url = "https://files.pythonhosted.org/packages/83/29/00b9b0322a473aee6cda87473401c9abb19506cd650cc69a8aa38277ea74/lxml-5.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:48fd46bf7155def2e15287c6f2b133a2f78e2d22cdf55647269977b873c65499", size = 3487718 },
+    { url = "https://files.pythonhosted.org/packages/f5/1f/a3b6b74a451ceb84b471caa75c934d2430a4d84395d38ef201d539f38cd1/lxml-5.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e7bc6df34d42322c5289e37e9971d6ed114e3776b45fa879f734bded9d1fea9c", size = 8076838 },
+    { url = "https://files.pythonhosted.org/packages/36/af/a567a55b3e47135b4d1f05a1118c24529104c003f95851374b3748139dc1/lxml-5.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6854f8bd8a1536f8a1d9a3655e6354faa6406621cf857dc27b681b69860645c7", size = 4381827 },
+    { url = "https://files.pythonhosted.org/packages/50/ba/4ee47d24c675932b3eb5b6de77d0f623c2db6dc466e7a1f199792c5e3e3a/lxml-5.4.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:696ea9e87442467819ac22394ca36cb3d01848dad1be6fac3fb612d3bd5a12cf", size = 5204098 },
+    { url = "https://files.pythonhosted.org/packages/f2/0f/b4db6dfebfefe3abafe360f42a3d471881687fd449a0b86b70f1f2683438/lxml-5.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ef80aeac414f33c24b3815ecd560cee272786c3adfa5f31316d8b349bfade28", size = 4930261 },
+    { url = "https://files.pythonhosted.org/packages/0b/1f/0bb1bae1ce056910f8db81c6aba80fec0e46c98d77c0f59298c70cd362a3/lxml-5.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b9c2754cef6963f3408ab381ea55f47dabc6f78f4b8ebb0f0b25cf1ac1f7609", size = 5529621 },
+    { url = "https://files.pythonhosted.org/packages/21/f5/e7b66a533fc4a1e7fa63dd22a1ab2ec4d10319b909211181e1ab3e539295/lxml-5.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a62cc23d754bb449d63ff35334acc9f5c02e6dae830d78dab4dd12b78a524f4", size = 4983231 },
+    { url = "https://files.pythonhosted.org/packages/11/39/a38244b669c2d95a6a101a84d3c85ba921fea827e9e5483e93168bf1ccb2/lxml-5.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f82125bc7203c5ae8633a7d5d20bcfdff0ba33e436e4ab0abc026a53a8960b7", size = 5084279 },
+    { url = "https://files.pythonhosted.org/packages/db/64/48cac242347a09a07740d6cee7b7fd4663d5c1abd65f2e3c60420e231b27/lxml-5.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b67319b4aef1a6c56576ff544b67a2a6fbd7eaee485b241cabf53115e8908b8f", size = 4927405 },
+    { url = "https://files.pythonhosted.org/packages/98/89/97442835fbb01d80b72374f9594fe44f01817d203fa056e9906128a5d896/lxml-5.4.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:a8ef956fce64c8551221f395ba21d0724fed6b9b6242ca4f2f7beb4ce2f41997", size = 5550169 },
+    { url = "https://files.pythonhosted.org/packages/f1/97/164ca398ee654eb21f29c6b582685c6c6b9d62d5213abc9b8380278e9c0a/lxml-5.4.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:0a01ce7d8479dce84fc03324e3b0c9c90b1ece9a9bb6a1b6c9025e7e4520e78c", size = 5062691 },
+    { url = "https://files.pythonhosted.org/packages/d0/bc/712b96823d7feb53482d2e4f59c090fb18ec7b0d0b476f353b3085893cda/lxml-5.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91505d3ddebf268bb1588eb0f63821f738d20e1e7f05d3c647a5ca900288760b", size = 5133503 },
+    { url = "https://files.pythonhosted.org/packages/d4/55/a62a39e8f9da2a8b6002603475e3c57c870cd9c95fd4b94d4d9ac9036055/lxml-5.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a3bcdde35d82ff385f4ede021df801b5c4a5bcdfb61ea87caabcebfc4945dc1b", size = 4999346 },
+    { url = "https://files.pythonhosted.org/packages/ea/47/a393728ae001b92bb1a9e095e570bf71ec7f7fbae7688a4792222e56e5b9/lxml-5.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aea7c06667b987787c7d1f5e1dfcd70419b711cdb47d6b4bb4ad4b76777a0563", size = 5627139 },
+    { url = "https://files.pythonhosted.org/packages/5e/5f/9dcaaad037c3e642a7ea64b479aa082968de46dd67a8293c541742b6c9db/lxml-5.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:a7fb111eef4d05909b82152721a59c1b14d0f365e2be4c742a473c5d7372f4f5", size = 5465609 },
+    { url = "https://files.pythonhosted.org/packages/a7/0a/ebcae89edf27e61c45023005171d0ba95cb414ee41c045ae4caf1b8487fd/lxml-5.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:43d549b876ce64aa18b2328faff70f5877f8c6dede415f80a2f799d31644d776", size = 5192285 },
+    { url = "https://files.pythonhosted.org/packages/42/ad/cc8140ca99add7d85c92db8b2354638ed6d5cc0e917b21d36039cb15a238/lxml-5.4.0-cp310-cp310-win32.whl", hash = "sha256:75133890e40d229d6c5837b0312abbe5bac1c342452cf0e12523477cd3aa21e7", size = 3477507 },
+    { url = "https://files.pythonhosted.org/packages/e9/39/597ce090da1097d2aabd2f9ef42187a6c9c8546d67c419ce61b88b336c85/lxml-5.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:de5b4e1088523e2b6f730d0509a9a813355b7f5659d70eb4f319c76beea2e250", size = 3805104 },
+    { url = "https://files.pythonhosted.org/packages/81/2d/67693cc8a605a12e5975380d7ff83020dcc759351b5a066e1cced04f797b/lxml-5.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:98a3912194c079ef37e716ed228ae0dcb960992100461b704aea4e93af6b0bb9", size = 8083240 },
+    { url = "https://files.pythonhosted.org/packages/73/53/b5a05ab300a808b72e848efd152fe9c022c0181b0a70b8bca1199f1bed26/lxml-5.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ea0252b51d296a75f6118ed0d8696888e7403408ad42345d7dfd0d1e93309a7", size = 4387685 },
+    { url = "https://files.pythonhosted.org/packages/d8/cb/1a3879c5f512bdcd32995c301886fe082b2edd83c87d41b6d42d89b4ea4d/lxml-5.4.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b92b69441d1bd39f4940f9eadfa417a25862242ca2c396b406f9272ef09cdcaa", size = 4991164 },
+    { url = "https://files.pythonhosted.org/packages/f9/94/bbc66e42559f9d04857071e3b3d0c9abd88579367fd2588a4042f641f57e/lxml-5.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20e16c08254b9b6466526bc1828d9370ee6c0d60a4b64836bc3ac2917d1e16df", size = 4746206 },
+    { url = "https://files.pythonhosted.org/packages/66/95/34b0679bee435da2d7cae895731700e519a8dfcab499c21662ebe671603e/lxml-5.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7605c1c32c3d6e8c990dd28a0970a3cbbf1429d5b92279e37fda05fb0c92190e", size = 5342144 },
+    { url = "https://files.pythonhosted.org/packages/e0/5d/abfcc6ab2fa0be72b2ba938abdae1f7cad4c632f8d552683ea295d55adfb/lxml-5.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecf4c4b83f1ab3d5a7ace10bafcb6f11df6156857a3c418244cef41ca9fa3e44", size = 4825124 },
+    { url = "https://files.pythonhosted.org/packages/5a/78/6bd33186c8863b36e084f294fc0a5e5eefe77af95f0663ef33809cc1c8aa/lxml-5.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cef4feae82709eed352cd7e97ae062ef6ae9c7b5dbe3663f104cd2c0e8d94ba", size = 4876520 },
+    { url = "https://files.pythonhosted.org/packages/3b/74/4d7ad4839bd0fc64e3d12da74fc9a193febb0fae0ba6ebd5149d4c23176a/lxml-5.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:df53330a3bff250f10472ce96a9af28628ff1f4efc51ccba351a8820bca2a8ba", size = 4765016 },
+    { url = "https://files.pythonhosted.org/packages/24/0d/0a98ed1f2471911dadfc541003ac6dd6879fc87b15e1143743ca20f3e973/lxml-5.4.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:aefe1a7cb852fa61150fcb21a8c8fcea7b58c4cb11fbe59c97a0a4b31cae3c8c", size = 5362884 },
+    { url = "https://files.pythonhosted.org/packages/48/de/d4f7e4c39740a6610f0f6959052b547478107967362e8424e1163ec37ae8/lxml-5.4.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ef5a7178fcc73b7d8c07229e89f8eb45b2908a9238eb90dcfc46571ccf0383b8", size = 4902690 },
+    { url = "https://files.pythonhosted.org/packages/07/8c/61763abd242af84f355ca4ef1ee096d3c1b7514819564cce70fd18c22e9a/lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d2ed1b3cb9ff1c10e6e8b00941bb2e5bb568b307bfc6b17dffbbe8be5eecba86", size = 4944418 },
+    { url = "https://files.pythonhosted.org/packages/f9/c5/6d7e3b63e7e282619193961a570c0a4c8a57fe820f07ca3fe2f6bd86608a/lxml-5.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:72ac9762a9f8ce74c9eed4a4e74306f2f18613a6b71fa065495a67ac227b3056", size = 4827092 },
+    { url = "https://files.pythonhosted.org/packages/71/4a/e60a306df54680b103348545706a98a7514a42c8b4fbfdcaa608567bb065/lxml-5.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f5cb182f6396706dc6cc1896dd02b1c889d644c081b0cdec38747573db88a7d7", size = 5418231 },
+    { url = "https://files.pythonhosted.org/packages/27/f2/9754aacd6016c930875854f08ac4b192a47fe19565f776a64004aa167521/lxml-5.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3a3178b4873df8ef9457a4875703488eb1622632a9cee6d76464b60e90adbfcd", size = 5261798 },
+    { url = "https://files.pythonhosted.org/packages/38/a2/0c49ec6941428b1bd4f280650d7b11a0f91ace9db7de32eb7aa23bcb39ff/lxml-5.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e094ec83694b59d263802ed03a8384594fcce477ce484b0cbcd0008a211ca751", size = 4988195 },
+    { url = "https://files.pythonhosted.org/packages/7a/75/87a3963a08eafc46a86c1131c6e28a4de103ba30b5ae903114177352a3d7/lxml-5.4.0-cp311-cp311-win32.whl", hash = "sha256:4329422de653cdb2b72afa39b0aa04252fca9071550044904b2e7036d9d97fe4", size = 3474243 },
+    { url = "https://files.pythonhosted.org/packages/fa/f9/1f0964c4f6c2be861c50db380c554fb8befbea98c6404744ce243a3c87ef/lxml-5.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd3be6481ef54b8cfd0e1e953323b7aa9d9789b94842d0e5b142ef4bb7999539", size = 3815197 },
+    { url = "https://files.pythonhosted.org/packages/f8/4c/d101ace719ca6a4ec043eb516fcfcb1b396a9fccc4fcd9ef593df34ba0d5/lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4", size = 8127392 },
+    { url = "https://files.pythonhosted.org/packages/11/84/beddae0cec4dd9ddf46abf156f0af451c13019a0fa25d7445b655ba5ccb7/lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d", size = 4415103 },
+    { url = "https://files.pythonhosted.org/packages/d0/25/d0d93a4e763f0462cccd2b8a665bf1e4343dd788c76dcfefa289d46a38a9/lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779", size = 5024224 },
+    { url = "https://files.pythonhosted.org/packages/31/ce/1df18fb8f7946e7f3388af378b1f34fcf253b94b9feedb2cec5969da8012/lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e", size = 4769913 },
+    { url = "https://files.pythonhosted.org/packages/4e/62/f4a6c60ae7c40d43657f552f3045df05118636be1165b906d3423790447f/lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9", size = 5290441 },
+    { url = "https://files.pythonhosted.org/packages/9e/aa/04f00009e1e3a77838c7fc948f161b5d2d5de1136b2b81c712a263829ea4/lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5", size = 4820165 },
+    { url = "https://files.pythonhosted.org/packages/c9/1f/e0b2f61fa2404bf0f1fdf1898377e5bd1b74cc9b2cf2c6ba8509b8f27990/lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5", size = 4932580 },
+    { url = "https://files.pythonhosted.org/packages/24/a2/8263f351b4ffe0ed3e32ea7b7830f845c795349034f912f490180d88a877/lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4", size = 4759493 },
+    { url = "https://files.pythonhosted.org/packages/05/00/41db052f279995c0e35c79d0f0fc9f8122d5b5e9630139c592a0b58c71b4/lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e", size = 5324679 },
+    { url = "https://files.pythonhosted.org/packages/1d/be/ee99e6314cdef4587617d3b3b745f9356d9b7dd12a9663c5f3b5734b64ba/lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7", size = 4890691 },
+    { url = "https://files.pythonhosted.org/packages/ad/36/239820114bf1d71f38f12208b9c58dec033cbcf80101cde006b9bde5cffd/lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079", size = 4955075 },
+    { url = "https://files.pythonhosted.org/packages/d4/e1/1b795cc0b174efc9e13dbd078a9ff79a58728a033142bc6d70a1ee8fc34d/lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20", size = 4838680 },
+    { url = "https://files.pythonhosted.org/packages/72/48/3c198455ca108cec5ae3662ae8acd7fd99476812fd712bb17f1b39a0b589/lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8", size = 5391253 },
+    { url = "https://files.pythonhosted.org/packages/d6/10/5bf51858971c51ec96cfc13e800a9951f3fd501686f4c18d7d84fe2d6352/lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f", size = 5261651 },
+    { url = "https://files.pythonhosted.org/packages/2b/11/06710dd809205377da380546f91d2ac94bad9ff735a72b64ec029f706c85/lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc", size = 5024315 },
+    { url = "https://files.pythonhosted.org/packages/f5/b0/15b6217834b5e3a59ebf7f53125e08e318030e8cc0d7310355e6edac98ef/lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f", size = 3486149 },
+    { url = "https://files.pythonhosted.org/packages/91/1e/05ddcb57ad2f3069101611bd5f5084157d90861a2ef460bf42f45cced944/lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2", size = 3817095 },
+    { url = "https://files.pythonhosted.org/packages/87/cb/2ba1e9dd953415f58548506fa5549a7f373ae55e80c61c9041b7fd09a38a/lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0", size = 8110086 },
+    { url = "https://files.pythonhosted.org/packages/b5/3e/6602a4dca3ae344e8609914d6ab22e52ce42e3e1638c10967568c5c1450d/lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de", size = 4404613 },
+    { url = "https://files.pythonhosted.org/packages/4c/72/bf00988477d3bb452bef9436e45aeea82bb40cdfb4684b83c967c53909c7/lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76", size = 5012008 },
+    { url = "https://files.pythonhosted.org/packages/92/1f/93e42d93e9e7a44b2d3354c462cd784dbaaf350f7976b5d7c3f85d68d1b1/lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d", size = 4760915 },
+    { url = "https://files.pythonhosted.org/packages/45/0b/363009390d0b461cf9976a499e83b68f792e4c32ecef092f3f9ef9c4ba54/lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422", size = 5283890 },
+    { url = "https://files.pythonhosted.org/packages/19/dc/6056c332f9378ab476c88e301e6549a0454dbee8f0ae16847414f0eccb74/lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551", size = 4812644 },
+    { url = "https://files.pythonhosted.org/packages/ee/8a/f8c66bbb23ecb9048a46a5ef9b495fd23f7543df642dabeebcb2eeb66592/lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c", size = 4921817 },
+    { url = "https://files.pythonhosted.org/packages/04/57/2e537083c3f381f83d05d9b176f0d838a9e8961f7ed8ddce3f0217179ce3/lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff", size = 4753916 },
+    { url = "https://files.pythonhosted.org/packages/d8/80/ea8c4072109a350848f1157ce83ccd9439601274035cd045ac31f47f3417/lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60", size = 5289274 },
+    { url = "https://files.pythonhosted.org/packages/b3/47/c4be287c48cdc304483457878a3f22999098b9a95f455e3c4bda7ec7fc72/lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8", size = 4874757 },
+    { url = "https://files.pythonhosted.org/packages/2f/04/6ef935dc74e729932e39478e44d8cfe6a83550552eaa072b7c05f6f22488/lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982", size = 4947028 },
+    { url = "https://files.pythonhosted.org/packages/cb/f9/c33fc8daa373ef8a7daddb53175289024512b6619bc9de36d77dca3df44b/lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61", size = 4834487 },
+    { url = "https://files.pythonhosted.org/packages/8d/30/fc92bb595bcb878311e01b418b57d13900f84c2b94f6eca9e5073ea756e6/lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54", size = 5381688 },
+    { url = "https://files.pythonhosted.org/packages/43/d1/3ba7bd978ce28bba8e3da2c2e9d5ae3f8f521ad3f0ca6ea4788d086ba00d/lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b", size = 5242043 },
+    { url = "https://files.pythonhosted.org/packages/ee/cd/95fa2201041a610c4d08ddaf31d43b98ecc4b1d74b1e7245b1abdab443cb/lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a", size = 5021569 },
+    { url = "https://files.pythonhosted.org/packages/2d/a6/31da006fead660b9512d08d23d31e93ad3477dd47cc42e3285f143443176/lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82", size = 3485270 },
+    { url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606 },
+    { url = "https://files.pythonhosted.org/packages/c6/b0/e4d1cbb8c078bc4ae44de9c6a79fec4e2b4151b1b4d50af71d799e76b177/lxml-5.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1b717b00a71b901b4667226bba282dd462c42ccf618ade12f9ba3674e1fabc55", size = 3892319 },
+    { url = "https://files.pythonhosted.org/packages/5b/aa/e2bdefba40d815059bcb60b371a36fbfcce970a935370e1b367ba1cc8f74/lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27a9ded0f0b52098ff89dd4c418325b987feed2ea5cc86e8860b0f844285d740", size = 4211614 },
+    { url = "https://files.pythonhosted.org/packages/3c/5f/91ff89d1e092e7cfdd8453a939436ac116db0a665e7f4be0cd8e65c7dc5a/lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b7ce10634113651d6f383aa712a194179dcd496bd8c41e191cec2099fa09de5", size = 4306273 },
+    { url = "https://files.pythonhosted.org/packages/be/7c/8c3f15df2ca534589717bfd19d1e3482167801caedfa4d90a575facf68a6/lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53370c26500d22b45182f98847243efb518d268374a9570409d2e2276232fd37", size = 4208552 },
+    { url = "https://files.pythonhosted.org/packages/7d/d8/9567afb1665f64d73fc54eb904e418d1138d7f011ed00647121b4dd60b38/lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c6364038c519dffdbe07e3cf42e6a7f8b90c275d4d1617a69bb59734c1a2d571", size = 4331091 },
+    { url = "https://files.pythonhosted.org/packages/f1/ab/fdbbd91d8d82bf1a723ba88ec3e3d76c022b53c391b0c13cad441cdb8f9e/lxml-5.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b12cb6527599808ada9eb2cd6e0e7d3d8f13fe7bbb01c6311255a15ded4c7ab4", size = 3487862 },
 ]
 
 [[package]]
@@ -1762,6 +1887,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
 ]
 
+[[package]]
+name = "mistune"
+version = "3.1.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/79/bda47f7dd7c3c55770478d6d02c9960c430b0cf1773b72366ff89126ea31/mistune-3.1.3.tar.gz", hash = "sha256:a7035c21782b2becb6be62f8f25d3df81ccb4d6fa477a6525b15af06539f02a0", size = 94347 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410 },
+]
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -2204,6 +2341,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 },
 ]
 
+[[package]]
+name = "picobox"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/b1/830714dd6778c1cb45826722b4e9bd21c94b33cca5df9ef2cc0b80c81b25/picobox-4.0.0.tar.gz", hash = "sha256:114da1b5606b2f615e8b0eb68d04198ad9de75af5adbcf5b36fe4f664ab927b6", size = 22666 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2d/c6/fd64ffd75d47c4fcf6c65808cc5c5c75e5d4357c197d3741ee1339e91257/picobox-4.0.0-py3-none-any.whl", hash = "sha256:4c27eb689fe45dabd9e64c382e04418147d0b746d155b4e80057dbb7ff82027e", size = 11641 },
+]
+
 [[package]]
 name = "pillow"
 version = "11.1.0"
@@ -2519,6 +2665,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ed/bd/54907846383dcc7ee28772d7e646f6c34276a17da740002a5cefe90f04f7/pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:58d9397b2e273ef76264b45531e9d552d8ec8a6688b7390b5be44c02a37aade8", size = 42085744 },
 ]
 
+[[package]]
+name = "pyasn1"
+version = "0.4.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a4/db/fffec68299e6d7bad3d504147f9094830b704527a7fc098b721d38cc7fa7/pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba", size = 146820 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/62/1e/a94a8d635fa3ce4cfc7f506003548d0a2447ae76fd5ca53932970fe3053f/pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", size = 77145 },
+]
+
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -2530,24 +2685,37 @@ wheels = [
 
 [[package]]
 name = "pycryptodomex"
-version = "3.21.0"
+version = "3.23.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/11/dc/e66551683ade663b5f07d7b3bc46434bf703491dbd22ee12d1f979ca828f/pycryptodomex-3.21.0.tar.gz", hash = "sha256:222d0bd05381dd25c32dd6065c071ebf084212ab79bab4599ba9e6a3e0009e6c", size = 4818543 }
+sdist = { url = "https://files.pythonhosted.org/packages/c9/85/e24bf90972a30b0fcd16c73009add1d7d7cd9140c2498a68252028899e41/pycryptodomex-3.23.0.tar.gz", hash = "sha256:71909758f010c82bc99b0abf4ea12012c98962fbf0583c2164f8b84533c2e4da", size = 4922157 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/22/5e/99f217d9881eead69607a2248dd7bbdf610837d7f5ad53f45a6cb71bbbfb/pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl", hash = "sha256:34325b84c8b380675fd2320d0649cdcbc9cf1e0d1526edbe8fce43ed858cdc7e", size = 2499490 },
-    { url = "https://files.pythonhosted.org/packages/ce/8f/4d0e2a859a6470289d64e39b419f01d2494dfa2e4995342d50f6c2834237/pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:103c133d6cd832ae7266feb0a65b69e3a5e4dbbd6f3a3ae3211a557fd653f516", size = 1638037 },
-    { url = "https://files.pythonhosted.org/packages/0c/9e/6e748c1fa814c956d356f93cf7192b19487ca56fc9e2a0bcde2bbc057601/pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77ac2ea80bcb4b4e1c6a596734c775a1615d23e31794967416afc14852a639d3", size = 2172279 },
-    { url = "https://files.pythonhosted.org/packages/46/3f/f5bef92b11750af9e3516d4e69736eeeff20a2818d34611508bef5a7b381/pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9aa0cf13a1a1128b3e964dc667e5fe5c6235f7d7cfb0277213f0e2a783837cc2", size = 2258130 },
-    { url = "https://files.pythonhosted.org/packages/de/4d/f0c65afd64ce435fd0547187ce6f99dfb37cdde16b05b57bca9f5c06966e/pycryptodomex-3.21.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:46eb1f0c8d309da63a2064c28de54e5e614ad17b7e2f88df0faef58ce192fc7b", size = 2297719 },
-    { url = "https://files.pythonhosted.org/packages/1c/6a/2a1a101b0345ee70376ba93df8de6c8c01aac8341fda02970800873456a7/pycryptodomex-3.21.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:cc7e111e66c274b0df5f4efa679eb31e23c7545d702333dfd2df10ab02c2a2ce", size = 2164079 },
-    { url = "https://files.pythonhosted.org/packages/3d/00/90a15f16c234815b660303c2d7266b41b401ea2605f3a90373e9d425e39f/pycryptodomex-3.21.0-cp36-abi3-musllinux_1_2_i686.whl", hash = "sha256:770d630a5c46605ec83393feaa73a9635a60e55b112e1fb0c3cea84c2897aa0a", size = 2333060 },
-    { url = "https://files.pythonhosted.org/packages/61/74/49f5d20c514ccc631b940cc9dfec45dcce418dc84a98463a2e2ebec33904/pycryptodomex-3.21.0-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:52e23a0a6e61691134aa8c8beba89de420602541afaae70f66e16060fdcd677e", size = 2257982 },
-    { url = "https://files.pythonhosted.org/packages/92/4b/d33ef74e2cc0025a259936661bb53432c5bbbadc561c5f2e023bcd73ce4c/pycryptodomex-3.21.0-cp36-abi3-win32.whl", hash = "sha256:a3d77919e6ff56d89aada1bd009b727b874d464cb0e2e3f00a49f7d2e709d76e", size = 1779052 },
-    { url = "https://files.pythonhosted.org/packages/5b/be/7c991840af1184009fc86267160948350d1bf875f153c97bb471ad944e40/pycryptodomex-3.21.0-cp36-abi3-win_amd64.whl", hash = "sha256:b0e9765f93fe4890f39875e6c90c96cb341767833cfa767f41b490b506fa9ec0", size = 1816307 },
-    { url = "https://files.pythonhosted.org/packages/e5/9f/39a6187f3986841fa6a9f35c6fdca5030ef73ff708b45a993813a51d7d10/pycryptodomex-3.21.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3efddfc50ac0ca143364042324046800c126a1d63816d532f2e19e6f2d8c0c31", size = 1619607 },
-    { url = "https://files.pythonhosted.org/packages/f8/70/60bb08e9e9841b18d4669fb69d84b64ce900aacd7eb0ebebd4c7b9bdecd3/pycryptodomex-3.21.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0df2608682db8279a9ebbaf05a72f62a321433522ed0e499bc486a6889b96bf3", size = 1653571 },
-    { url = "https://files.pythonhosted.org/packages/c9/6f/191b73509291c5ff0dddec9cc54797b1d73303c12b2e4017b24678e57099/pycryptodomex-3.21.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5823d03e904ea3e53aebd6799d6b8ec63b7675b5d2f4a4bd5e3adcb512d03b37", size = 1691548 },
-    { url = "https://files.pythonhosted.org/packages/2d/c7/a0d3356f3074ac548afefa515ff46f3bea011deca607faf1c09b26dd5330/pycryptodomex-3.21.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:27e84eeff24250ffec32722334749ac2a57a5fd60332cd6a0680090e7c42877e", size = 1792099 },
+    { url = "https://files.pythonhosted.org/packages/2e/00/10edb04777069a42490a38c137099d4b17ba6e36a4e6e28bdc7470e9e853/pycryptodomex-3.23.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:7b37e08e3871efe2187bc1fd9320cc81d87caf19816c648f24443483005ff886", size = 2498764 },
+    { url = "https://files.pythonhosted.org/packages/6b/3f/2872a9c2d3a27eac094f9ceaa5a8a483b774ae69018040ea3240d5b11154/pycryptodomex-3.23.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:91979028227543010d7b2ba2471cf1d1e398b3f183cb105ac584df0c36dac28d", size = 1643012 },
+    { url = "https://files.pythonhosted.org/packages/70/af/774c2e2b4f6570fbf6a4972161adbb183aeeaa1863bde31e8706f123bf92/pycryptodomex-3.23.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b8962204c47464d5c1c4038abeadd4514a133b28748bcd9fa5b6d62e3cec6fa", size = 2187643 },
+    { url = "https://files.pythonhosted.org/packages/de/a3/71065b24cb889d537954cedc3ae5466af00a2cabcff8e29b73be047e9a19/pycryptodomex-3.23.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a33986a0066860f7fcf7c7bd2bc804fa90e434183645595ae7b33d01f3c91ed8", size = 2273762 },
+    { url = "https://files.pythonhosted.org/packages/c9/0b/ff6f43b7fbef4d302c8b981fe58467b8871902cdc3eb28896b52421422cc/pycryptodomex-3.23.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7947ab8d589e3178da3d7cdeabe14f841b391e17046954f2fbcd941705762b5", size = 2313012 },
+    { url = "https://files.pythonhosted.org/packages/02/de/9d4772c0506ab6da10b41159493657105d3f8bb5c53615d19452afc6b315/pycryptodomex-3.23.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c25e30a20e1b426e1f0fa00131c516f16e474204eee1139d1603e132acffc314", size = 2186856 },
+    { url = "https://files.pythonhosted.org/packages/28/ad/8b30efcd6341707a234e5eba5493700a17852ca1ac7a75daa7945fcf6427/pycryptodomex-3.23.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:da4fa650cef02db88c2b98acc5434461e027dce0ae8c22dd5a69013eaf510006", size = 2347523 },
+    { url = "https://files.pythonhosted.org/packages/0f/02/16868e9f655b7670dbb0ac4f2844145cbc42251f916fc35c414ad2359849/pycryptodomex-3.23.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:58b851b9effd0d072d4ca2e4542bf2a4abcf13c82a29fd2c93ce27ee2a2e9462", size = 2272825 },
+    { url = "https://files.pythonhosted.org/packages/ca/18/4ca89ac737230b52ac8ffaca42f9c6f1fd07c81a6cd821e91af79db60632/pycryptodomex-3.23.0-cp313-cp313t-win32.whl", hash = "sha256:a9d446e844f08299236780f2efa9898c818fe7e02f17263866b8550c7d5fb328", size = 1772078 },
+    { url = "https://files.pythonhosted.org/packages/73/34/13e01c322db027682e00986873eca803f11c56ade9ba5bbf3225841ea2d4/pycryptodomex-3.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bc65bdd9fc8de7a35a74cab1c898cab391a4add33a8fe740bda00f5976ca4708", size = 1803656 },
+    { url = "https://files.pythonhosted.org/packages/54/68/9504c8796b1805d58f4425002bcca20f12880e6fa4dc2fc9a668705c7a08/pycryptodomex-3.23.0-cp313-cp313t-win_arm64.whl", hash = "sha256:c885da45e70139464f082018ac527fdaad26f1657a99ee13eecdce0f0ca24ab4", size = 1707172 },
+    { url = "https://files.pythonhosted.org/packages/dd/9c/1a8f35daa39784ed8adf93a694e7e5dc15c23c741bbda06e1d45f8979e9e/pycryptodomex-3.23.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:06698f957fe1ab229a99ba2defeeae1c09af185baa909a31a5d1f9d42b1aaed6", size = 2499240 },
+    { url = "https://files.pythonhosted.org/packages/7a/62/f5221a191a97157d240cf6643747558759126c76ee92f29a3f4aee3197a5/pycryptodomex-3.23.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2c2537863eccef2d41061e82a881dcabb04944c5c06c5aa7110b577cc487545", size = 1644042 },
+    { url = "https://files.pythonhosted.org/packages/8c/fd/5a054543c8988d4ed7b612721d7e78a4b9bf36bc3c5ad45ef45c22d0060e/pycryptodomex-3.23.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43c446e2ba8df8889e0e16f02211c25b4934898384c1ec1ec04d7889c0333587", size = 2186227 },
+    { url = "https://files.pythonhosted.org/packages/c8/a9/8862616a85cf450d2822dbd4fff1fcaba90877907a6ff5bc2672cafe42f8/pycryptodomex-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f489c4765093fb60e2edafdf223397bc716491b2b69fe74367b70d6999257a5c", size = 2272578 },
+    { url = "https://files.pythonhosted.org/packages/46/9f/bda9c49a7c1842820de674ab36c79f4fbeeee03f8ff0e4f3546c3889076b/pycryptodomex-3.23.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdc69d0d3d989a1029df0eed67cc5e8e5d968f3724f4519bd03e0ec68df7543c", size = 2312166 },
+    { url = "https://files.pythonhosted.org/packages/03/cc/870b9bf8ca92866ca0186534801cf8d20554ad2a76ca959538041b7a7cf4/pycryptodomex-3.23.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6bbcb1dd0f646484939e142462d9e532482bc74475cecf9c4903d4e1cd21f003", size = 2185467 },
+    { url = "https://files.pythonhosted.org/packages/96/e3/ce9348236d8e669fea5dd82a90e86be48b9c341210f44e25443162aba187/pycryptodomex-3.23.0-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:8a4fcd42ccb04c31268d1efeecfccfd1249612b4de6374205376b8f280321744", size = 2346104 },
+    { url = "https://files.pythonhosted.org/packages/a5/e9/e869bcee87beb89040263c416a8a50204f7f7a83ac11897646c9e71e0daf/pycryptodomex-3.23.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:55ccbe27f049743a4caf4f4221b166560d3438d0b1e5ab929e07ae1702a4d6fd", size = 2271038 },
+    { url = "https://files.pythonhosted.org/packages/8d/67/09ee8500dd22614af5fbaa51a4aee6e342b5fa8aecf0a6cb9cbf52fa6d45/pycryptodomex-3.23.0-cp37-abi3-win32.whl", hash = "sha256:189afbc87f0b9f158386bf051f720e20fa6145975f1e76369303d0f31d1a8d7c", size = 1771969 },
+    { url = "https://files.pythonhosted.org/packages/69/96/11f36f71a865dd6df03716d33bd07a67e9d20f6b8d39820470b766af323c/pycryptodomex-3.23.0-cp37-abi3-win_amd64.whl", hash = "sha256:52e5ca58c3a0b0bd5e100a9fbc8015059b05cffc6c66ce9d98b4b45e023443b9", size = 1803124 },
+    { url = "https://files.pythonhosted.org/packages/f9/93/45c1cdcbeb182ccd2e144c693eaa097763b08b38cded279f0053ed53c553/pycryptodomex-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:02d87b80778c171445d67e23d1caef279bf4b25c3597050ccd2e13970b57fd51", size = 1707161 },
+    { url = "https://files.pythonhosted.org/packages/f3/b8/3e76d948c3c4ac71335bbe75dac53e154b40b0f8f1f022dfa295257a0c96/pycryptodomex-3.23.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ebfff755c360d674306e5891c564a274a47953562b42fb74a5c25b8fc1fb1cb5", size = 1627695 },
+    { url = "https://files.pythonhosted.org/packages/6a/cf/80f4297a4820dfdfd1c88cf6c4666a200f204b3488103d027b5edd9176ec/pycryptodomex-3.23.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eca54f4bb349d45afc17e3011ed4264ef1cc9e266699874cdd1349c504e64798", size = 1675772 },
+    { url = "https://files.pythonhosted.org/packages/d1/42/1e969ee0ad19fe3134b0e1b856c39bd0b70d47a4d0e81c2a8b05727394c9/pycryptodomex-3.23.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2596e643d4365e14d0879dc5aafe6355616c61c2176009270f3048f6d9a61f", size = 1668083 },
+    { url = "https://files.pythonhosted.org/packages/6e/c3/1de4f7631fea8a992a44ba632aa40e0008764c0fb9bf2854b0acf78c2cf2/pycryptodomex-3.23.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fdfac7cda115bca3a5abb2f9e43bc2fb66c2b65ab074913643803ca7083a79ea", size = 1706056 },
+    { url = "https://files.pythonhosted.org/packages/f2/5f/af7da8e6f1e42b52f44a24d08b8e4c726207434e2593732d39e7af5e7256/pycryptodomex-3.23.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:14c37aaece158d0ace436f76a7bb19093db3b4deade9797abfc39ec6cd6cc2fe", size = 1806478 },
 ]
 
 [[package]]
@@ -2742,6 +2910,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c8/c7/c160021cbecd956cc1a6f79e5fe155f7868b2e5b848f1320dad0b3e3122f/pytest_html-4.1.1-py3-none-any.whl", hash = "sha256:c8152cea03bd4e9bee6d525573b67bbc6622967b72b9628dda0ea3e2a0b5dd71", size = 23491 },
 ]
 
+[[package]]
+name = "pytest-json-report"
+version = "1.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+    { name = "pytest-metadata" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4f/d3/765dae9712fcd68d820338908c1337e077d5fdadccd5cacf95b9b0bea278/pytest-json-report-1.5.0.tar.gz", hash = "sha256:2dde3c647851a19b5f3700729e8310a6e66efb2077d674f27ddea3d34dc615de", size = 21241 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/35/d07400c715bf8a88aa0c1ee9c9eb6050ca7fe5b39981f0eea773feeb0681/pytest_json_report-1.5.0-py3-none-any.whl", hash = "sha256:9897b68c910b12a2e48dd849f9a284b2c79a732a8a9cb398452ddd23d3c8c325", size = 13222 },
+]
+
 [[package]]
 name = "pytest-metadata"
 version = "3.1.1"
@@ -2754,6 +2935,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl", hash = "sha256:c8e0844db684ee1c798cfa38908d20d67d0463ecb6137c72e91f418558dd5f4b", size = 11428 },
 ]
 
+[[package]]
+name = "pytest-timeout"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382 },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -2775,6 +2968,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 },
 ]
 
+[[package]]
+name = "python-jose"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "ecdsa" },
+    { name = "pyasn1" },
+    { name = "rsa" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8e/a0/c49687cf40cb6128ea4e0559855aff92cd5ebd1a60a31c08526818c0e51e/python-jose-3.4.0.tar.gz", hash = "sha256:9a9a40f418ced8ecaf7e3b28d69887ceaa76adad3bcaa6dae0d9e596fec1d680", size = 92145 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/63/b0/2586ea6b6fd57a994ece0b56418cbe93fff0efb85e2c9eb6b0caf24a4e37/python_jose-3.4.0-py2.py3-none-any.whl", hash = "sha256:9c9f616819652d109bd889ecd1e15e9a162b9b94d682534c9c2146092945b78f", size = 34616 },
+]
+
 [[package]]
 name = "pytz"
 version = "2025.1"
@@ -3215,6 +3422,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9f/2e/c5c1689e80298d4e94c75b70faada4c25445739d91b94c211244a3ed7ed1/rpds_py-0.22.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:177c7c0fce2855833819c98e43c262007f42ce86651ffbb84f37883308cb0e7d", size = 233338 },
 ]
 
+[[package]]
+name = "rsa"
+version = "4.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyasn1" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/aa/65/7d973b89c4d2351d7fb232c2e452547ddfa243e93131e7cfa766da627b52/rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21", size = 29711 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/49/97/fa78e3d2f65c02c8e1268b9aba606569fe97f6c8f7c2d74394553347c145/rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", size = 34315 },
+]
+
 [[package]]
 name = "ruamel-yaml"
 version = "0.18.10"
@@ -3297,12 +3516,34 @@ wheels = [
 ]
 
 [[package]]
-name = "setuptools"
-version = "75.8.0"
+name = "safetensors"
+version = "0.5.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/92/ec/089608b791d210aec4e7f97488e67ab0d33add3efccb83a056cbafe3a2a6/setuptools-75.8.0.tar.gz", hash = "sha256:c5afc8f407c626b8313a86e10311dd3f661c6cd9c09d4bf8c15c0e11f9f2b0e6", size = 1343222 }
+sdist = { url = "https://files.pythonhosted.org/packages/71/7e/2d5d6ee7b40c0682315367ec7475693d110f512922d582fef1bd4a63adc3/safetensors-0.5.3.tar.gz", hash = "sha256:b6b0d6ecacec39a4fdd99cc19f4576f5219ce858e6fd8dbe7609df0b8dc56965", size = 67210 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/8a/b9dc7678803429e4a3bc9ba462fa3dd9066824d3c607490235c6a796be5a/setuptools-75.8.0-py3-none-any.whl", hash = "sha256:e3982f444617239225d675215d51f6ba05f845d4eec313da4418fdbb56fb27e3", size = 1228782 },
+    { url = "https://files.pythonhosted.org/packages/18/ae/88f6c49dbd0cc4da0e08610019a3c78a7d390879a919411a410a1876d03a/safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073", size = 436917 },
+    { url = "https://files.pythonhosted.org/packages/b8/3b/11f1b4a2f5d2ab7da34ecc062b0bc301f2be024d110a6466726bec8c055c/safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7", size = 418419 },
+    { url = "https://files.pythonhosted.org/packages/5d/9a/add3e6fef267658075c5a41573c26d42d80c935cdc992384dfae435feaef/safetensors-0.5.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11bce6164887cd491ca75c2326a113ba934be596e22b28b1742ce27b1d076467", size = 459493 },
+    { url = "https://files.pythonhosted.org/packages/df/5c/bf2cae92222513cc23b3ff85c4a1bb2811a2c3583ac0f8e8d502751de934/safetensors-0.5.3-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4a243be3590bc3301c821da7a18d87224ef35cbd3e5f5727e4e0728b8172411e", size = 472400 },
+    { url = "https://files.pythonhosted.org/packages/58/11/7456afb740bd45782d0f4c8e8e1bb9e572f1bf82899fb6ace58af47b4282/safetensors-0.5.3-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8bd84b12b1670a6f8e50f01e28156422a2bc07fb16fc4e98bded13039d688a0d", size = 522891 },
+    { url = "https://files.pythonhosted.org/packages/57/3d/fe73a9d2ace487e7285f6e157afee2383bd1ddb911b7cb44a55cf812eae3/safetensors-0.5.3-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:391ac8cab7c829452175f871fcaf414aa1e292b5448bd02620f675a7f3e7abb9", size = 537694 },
+    { url = "https://files.pythonhosted.org/packages/a6/f8/dae3421624fcc87a89d42e1898a798bc7ff72c61f38973a65d60df8f124c/safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cead1fa41fc54b1e61089fa57452e8834f798cb1dc7a09ba3524f1eb08e0317a", size = 471642 },
+    { url = "https://files.pythonhosted.org/packages/ce/20/1fbe16f9b815f6c5a672f5b760951e20e17e43f67f231428f871909a37f6/safetensors-0.5.3-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1077f3e94182d72618357b04b5ced540ceb71c8a813d3319f1aba448e68a770d", size = 502241 },
+    { url = "https://files.pythonhosted.org/packages/5f/18/8e108846b506487aa4629fe4116b27db65c3dde922de2c8e0cc1133f3f29/safetensors-0.5.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:799021e78287bac619c7b3f3606730a22da4cda27759ddf55d37c8db7511c74b", size = 638001 },
+    { url = "https://files.pythonhosted.org/packages/82/5a/c116111d8291af6c8c8a8b40628fe833b9db97d8141c2a82359d14d9e078/safetensors-0.5.3-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:df26da01aaac504334644e1b7642fa000bfec820e7cef83aeac4e355e03195ff", size = 734013 },
+    { url = "https://files.pythonhosted.org/packages/7d/ff/41fcc4d3b7de837963622e8610d998710705bbde9a8a17221d85e5d0baad/safetensors-0.5.3-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:32c3ef2d7af8b9f52ff685ed0bc43913cdcde135089ae322ee576de93eae5135", size = 670687 },
+    { url = "https://files.pythonhosted.org/packages/40/ad/2b113098e69c985a3d8fbda4b902778eae4a35b7d5188859b4a63d30c161/safetensors-0.5.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:37f1521be045e56fc2b54c606d4455573e717b2d887c579ee1dbba5f868ece04", size = 643147 },
+    { url = "https://files.pythonhosted.org/packages/0a/0c/95aeb51d4246bd9a3242d3d8349c1112b4ee7611a4b40f0c5c93b05f001d/safetensors-0.5.3-cp38-abi3-win32.whl", hash = "sha256:cfc0ec0846dcf6763b0ed3d1846ff36008c6e7290683b61616c4b040f6a54ace", size = 296677 },
+    { url = "https://files.pythonhosted.org/packages/69/e2/b011c38e5394c4c18fb5500778a55ec43ad6106126e74723ffaee246f56e/safetensors-0.5.3-cp38-abi3-win_amd64.whl", hash = "sha256:836cbbc320b47e80acd40e44c8682db0e8ad7123209f69b093def21ec7cafd11", size = 308878 },
+]
+
+[[package]]
+name = "setuptools"
+version = "80.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8d/d2/ec1acaaff45caed5c2dedb33b67055ba9d4e96b091094df90762e60135fe/setuptools-80.8.0.tar.gz", hash = "sha256:49f7af965996f26d43c8ae34539c8d99c5042fbff34302ea151eaa9c207cd257", size = 1319720 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/58/29/93c53c098d301132196c3238c312825324740851d77a8500a2462c0fd888/setuptools-80.8.0-py3-none-any.whl", hash = "sha256:95a60484590d24103af13b686121328cc2736bee85de8936383111e421b9edc0", size = 1201470 },
 ]
 
 [[package]]
@@ -3410,6 +3651,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c6/43/65c0acbd8cc6f50195a3a1fc195c404988b15c67090e73c7a41a9f57d6bd/sphinx_design-0.6.1-py3-none-any.whl", hash = "sha256:b11f37db1a802a183d61b159d9a202314d4d2fe29c163437001324fe2f19549c", size = 2215338 },
 ]
 
+[[package]]
+name = "sphinx-mdinclude"
+version = "0.6.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docutils" },
+    { name = "mistune" },
+    { name = "pygments" },
+    { name = "sphinx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b6/a7/c9a7888bb2187fdb06955d71e75f6f266b7e179b356ac76138d160a5b7eb/sphinx_mdinclude-0.6.2.tar.gz", hash = "sha256:447462e82cb8be61404a2204227f920769eb923d2f57608e3325f3bb88286b4c", size = 65257 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/42/3d/6b41fe1637cd53c4b10d56e0e6f396546f837973dabf9c4b2a1de44620ac/sphinx_mdinclude-0.6.2-py3-none-any.whl", hash = "sha256:648e78edb067c0e4bffc22943278d49d54a0714494743592032fa3ad82a86984", size = 16911 },
+]
+
 [[package]]
 name = "sphinx-rtd-dark-mode"
 version = "1.3.0"
@@ -3477,6 +3733,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 },
 ]
 
+[[package]]
+name = "sphinxcontrib-httpdomain"
+version = "1.8.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+    { name = "sphinx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/be/ef/82d3cfafb7febce4f7df8dcf3cde9d072350b41066e05a4f559b4e9105d0/sphinxcontrib-httpdomain-1.8.1.tar.gz", hash = "sha256:6c2dfe6ca282d75f66df333869bb0ce7331c01b475db6809ff9d107b7cdfe04b", size = 19266 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/49/aad47b8cf27a0d7703f1311aad8c368bb22866ddee1a2d2cd3f69bc45e0c/sphinxcontrib_httpdomain-1.8.1-py2.py3-none-any.whl", hash = "sha256:21eefe1270e4d9de8d717cc89ee92cc4871b8736774393bafc5e38a6bb77b1d5", size = 25513 },
+]
+
 [[package]]
 name = "sphinxcontrib-jquery"
 version = "4.1"
@@ -3511,6 +3780,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cd/c8/784b9ac6ea08aa594c1a4becbd0dbe77186785362e31fd633b8c6ae0197a/sphinxcontrib_mermaid-1.0.0-py3-none-any.whl", hash = "sha256:60b72710ea02087f212028feb09711225fbc2e343a10d34822fe787510e1caa3", size = 9597 },
 ]
 
+[[package]]
+name = "sphinxcontrib-openapi"
+version = "0.8.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "deepmerge" },
+    { name = "jsonschema" },
+    { name = "picobox" },
+    { name = "pyyaml" },
+    { name = "sphinx" },
+    { name = "sphinx-mdinclude" },
+    { name = "sphinxcontrib-httpdomain" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/a7/66a5c9aba7dbbb0c2b050f60e71402818cbf5f127ace13ed971029cc745e/sphinxcontrib-openapi-0.8.4.tar.gz", hash = "sha256:df883808a5b5e4b4113ad697185c43a3f42df3dce70453af78ba7076907e9a20", size = 71848 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d5/c3/ee00486f38d78309a60ee0d6031b2545b22ac5f0007d841dd174abc68774/sphinxcontrib_openapi-0.8.4-py3-none-any.whl", hash = "sha256:50911c18d452d9390ee3a384ef8dc8bde6135f542ba55691f81e1fbc0b71014e", size = 34510 },
+]
+
 [[package]]
 name = "sphinxcontrib-qthelp"
 version = "2.0.0"
@@ -3554,6 +3841,56 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5d/8b/a0271fe65357860ccc52168181891e9fc9d354bfdc9be273e6a77b84f905/sphinxcontrib_video-0.4.1-py3-none-any.whl", hash = "sha256:d63ec68983dac36960557973281a616b5d9e68838369763313fc80533b1ad774", size = 10066 },
 ]
 
+[[package]]
+name = "sqlalchemy"
+version = "2.0.41"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/63/66/45b165c595ec89aa7dcc2c1cd222ab269bc753f1fc7a1e68f8481bd957bf/sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9", size = 9689424 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/12/d7c445b1940276a828efce7331cb0cb09d6e5f049651db22f4ebb0922b77/sqlalchemy-2.0.41-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b1f09b6821406ea1f94053f346f28f8215e293344209129a9c0fcc3578598d7b", size = 2117967 },
+    { url = "https://files.pythonhosted.org/packages/6f/b8/cb90f23157e28946b27eb01ef401af80a1fab7553762e87df51507eaed61/sqlalchemy-2.0.41-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1936af879e3db023601196a1684d28e12f19ccf93af01bf3280a3262c4b6b4e5", size = 2107583 },
+    { url = "https://files.pythonhosted.org/packages/9e/c2/eef84283a1c8164a207d898e063edf193d36a24fb6a5bb3ce0634b92a1e8/sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2ac41acfc8d965fb0c464eb8f44995770239668956dc4cdf502d1b1ffe0d747", size = 3186025 },
+    { url = "https://files.pythonhosted.org/packages/bd/72/49d52bd3c5e63a1d458fd6d289a1523a8015adedbddf2c07408ff556e772/sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81c24e0c0fde47a9723c81d5806569cddef103aebbf79dbc9fcbb617153dea30", size = 3186259 },
+    { url = "https://files.pythonhosted.org/packages/4f/9e/e3ffc37d29a3679a50b6bbbba94b115f90e565a2b4545abb17924b94c52d/sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23a8825495d8b195c4aa9ff1c430c28f2c821e8c5e2d98089228af887e5d7e29", size = 3126803 },
+    { url = "https://files.pythonhosted.org/packages/8a/76/56b21e363f6039978ae0b72690237b38383e4657281285a09456f313dd77/sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:60c578c45c949f909a4026b7807044e7e564adf793537fc762b2489d522f3d11", size = 3148566 },
+    { url = "https://files.pythonhosted.org/packages/3b/92/11b8e1b69bf191bc69e300a99badbbb5f2f1102f2b08b39d9eee2e21f565/sqlalchemy-2.0.41-cp310-cp310-win32.whl", hash = "sha256:118c16cd3f1b00c76d69343e38602006c9cfb9998fa4f798606d28d63f23beda", size = 2086696 },
+    { url = "https://files.pythonhosted.org/packages/5c/88/2d706c9cc4502654860f4576cd54f7db70487b66c3b619ba98e0be1a4642/sqlalchemy-2.0.41-cp310-cp310-win_amd64.whl", hash = "sha256:7492967c3386df69f80cf67efd665c0f667cee67032090fe01d7d74b0e19bb08", size = 2110200 },
+    { url = "https://files.pythonhosted.org/packages/37/4e/b00e3ffae32b74b5180e15d2ab4040531ee1bef4c19755fe7926622dc958/sqlalchemy-2.0.41-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6375cd674fe82d7aa9816d1cb96ec592bac1726c11e0cafbf40eeee9a4516b5f", size = 2121232 },
+    { url = "https://files.pythonhosted.org/packages/ef/30/6547ebb10875302074a37e1970a5dce7985240665778cfdee2323709f749/sqlalchemy-2.0.41-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f8c9fdd15a55d9465e590a402f42082705d66b05afc3ffd2d2eb3c6ba919560", size = 2110897 },
+    { url = "https://files.pythonhosted.org/packages/9e/21/59df2b41b0f6c62da55cd64798232d7349a9378befa7f1bb18cf1dfd510a/sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32f9dc8c44acdee06c8fc6440db9eae8b4af8b01e4b1aee7bdd7241c22edff4f", size = 3273313 },
+    { url = "https://files.pythonhosted.org/packages/62/e4/b9a7a0e5c6f79d49bcd6efb6e90d7536dc604dab64582a9dec220dab54b6/sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c11ceb9a1f482c752a71f203a81858625d8df5746d787a4786bca4ffdf71c6", size = 3273807 },
+    { url = "https://files.pythonhosted.org/packages/39/d8/79f2427251b44ddee18676c04eab038d043cff0e764d2d8bb08261d6135d/sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:911cc493ebd60de5f285bcae0491a60b4f2a9f0f5c270edd1c4dbaef7a38fc04", size = 3209632 },
+    { url = "https://files.pythonhosted.org/packages/d4/16/730a82dda30765f63e0454918c982fb7193f6b398b31d63c7c3bd3652ae5/sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03968a349db483936c249f4d9cd14ff2c296adfa1290b660ba6516f973139582", size = 3233642 },
+    { url = "https://files.pythonhosted.org/packages/04/61/c0d4607f7799efa8b8ea3c49b4621e861c8f5c41fd4b5b636c534fcb7d73/sqlalchemy-2.0.41-cp311-cp311-win32.whl", hash = "sha256:293cd444d82b18da48c9f71cd7005844dbbd06ca19be1ccf6779154439eec0b8", size = 2086475 },
+    { url = "https://files.pythonhosted.org/packages/9d/8e/8344f8ae1cb6a479d0741c02cd4f666925b2bf02e2468ddaf5ce44111f30/sqlalchemy-2.0.41-cp311-cp311-win_amd64.whl", hash = "sha256:3d3549fc3e40667ec7199033a4e40a2f669898a00a7b18a931d3efb4c7900504", size = 2110903 },
+    { url = "https://files.pythonhosted.org/packages/3e/2a/f1f4e068b371154740dd10fb81afb5240d5af4aa0087b88d8b308b5429c2/sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9", size = 2119645 },
+    { url = "https://files.pythonhosted.org/packages/9b/e8/c664a7e73d36fbfc4730f8cf2bf930444ea87270f2825efbe17bf808b998/sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1", size = 2107399 },
+    { url = "https://files.pythonhosted.org/packages/5c/78/8a9cf6c5e7135540cb682128d091d6afa1b9e48bd049b0d691bf54114f70/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70", size = 3293269 },
+    { url = "https://files.pythonhosted.org/packages/3c/35/f74add3978c20de6323fb11cb5162702670cc7a9420033befb43d8d5b7a4/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e", size = 3303364 },
+    { url = "https://files.pythonhosted.org/packages/6a/d4/c990f37f52c3f7748ebe98883e2a0f7d038108c2c5a82468d1ff3eec50b7/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078", size = 3229072 },
+    { url = "https://files.pythonhosted.org/packages/15/69/cab11fecc7eb64bc561011be2bd03d065b762d87add52a4ca0aca2e12904/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae", size = 3268074 },
+    { url = "https://files.pythonhosted.org/packages/5c/ca/0c19ec16858585d37767b167fc9602593f98998a68a798450558239fb04a/sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6", size = 2084514 },
+    { url = "https://files.pythonhosted.org/packages/7f/23/4c2833d78ff3010a4e17f984c734f52b531a8c9060a50429c9d4b0211be6/sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0", size = 2111557 },
+    { url = "https://files.pythonhosted.org/packages/d3/ad/2e1c6d4f235a97eeef52d0200d8ddda16f6c4dd70ae5ad88c46963440480/sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443", size = 2115491 },
+    { url = "https://files.pythonhosted.org/packages/cf/8d/be490e5db8400dacc89056f78a52d44b04fbf75e8439569d5b879623a53b/sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc", size = 2102827 },
+    { url = "https://files.pythonhosted.org/packages/a0/72/c97ad430f0b0e78efaf2791342e13ffeafcbb3c06242f01a3bb8fe44f65d/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1", size = 3225224 },
+    { url = "https://files.pythonhosted.org/packages/5e/51/5ba9ea3246ea068630acf35a6ba0d181e99f1af1afd17e159eac7e8bc2b8/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a", size = 3230045 },
+    { url = "https://files.pythonhosted.org/packages/78/2f/8c14443b2acea700c62f9b4a8bad9e49fc1b65cfb260edead71fd38e9f19/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d", size = 3159357 },
+    { url = "https://files.pythonhosted.org/packages/fc/b2/43eacbf6ccc5276d76cea18cb7c3d73e294d6fb21f9ff8b4eef9b42bbfd5/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23", size = 3197511 },
+    { url = "https://files.pythonhosted.org/packages/fa/2e/677c17c5d6a004c3c45334ab1dbe7b7deb834430b282b8a0f75ae220c8eb/sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f", size = 2082420 },
+    { url = "https://files.pythonhosted.org/packages/e9/61/e8c1b9b6307c57157d328dd8b8348ddc4c47ffdf1279365a13b2b98b8049/sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df", size = 2108329 },
+    { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224 },
+]
+
+[package.optional-dependencies]
+asyncio = [
+    { name = "greenlet" },
+]
+
 [[package]]
 name = "sqlite-vec"
 version = "0.1.6"
@@ -3634,6 +3971,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/eb/17/fc425e1d4d86e31b2aaf0812a2ef2163763a0670d671720c7c36e8679323/streamlit-1.44.1-py3-none-any.whl", hash = "sha256:9fe355f58b11f4eb71e74f115ce1f38c4c9eaff2733e6bcffb510ac1298a5990", size = 9812242 },
 ]
 
+[[package]]
+name = "streamlit-keycloak"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "streamlit" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/21/db852a3f16cc3b213c5b551080dfc6074ca3be646c662c62ec025e3eb2c3/streamlit-keycloak-1.1.1.tar.gz", hash = "sha256:797feeeb7c463f46535b1b29207bf11f4492bcb137145d758d4faab15d329992", size = 359954 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f0/fd/0b659308b6df1c527ef67e251b7dbca88001ce481cf20e14b63a4c96b2d9/streamlit_keycloak-1.1.1-py3-none-any.whl", hash = "sha256:55558ddaf3e62f1d67b7840044f3b6416d26925f3e542954af751dd1d480565a", size = 362204 },
+]
+
 [[package]]
 name = "streamlit-option-menu"
 version = "0.4.0"
@@ -3712,6 +4061,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 },
 ]
 
+[[package]]
+name = "tokenizers"
+version = "0.21.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "huggingface-hub" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/92/76/5ac0c97f1117b91b7eb7323dcd61af80d72f790b4df71249a7850c195f30/tokenizers-0.21.1.tar.gz", hash = "sha256:a1bb04dc5b448985f86ecd4b05407f5a8d97cb2c0532199b2a302a604a0165ab", size = 343256 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a5/1f/328aee25f9115bf04262e8b4e5a2050b7b7cf44b59c74e982db7270c7f30/tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e78e413e9e668ad790a29456e677d9d3aa50a9ad311a40905d6861ba7692cf41", size = 2780767 },
+    { url = "https://files.pythonhosted.org/packages/ae/1a/4526797f3719b0287853f12c5ad563a9be09d446c44ac784cdd7c50f76ab/tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:cd51cd0a91ecc801633829fcd1fda9cf8682ed3477c6243b9a095539de4aecf3", size = 2650555 },
+    { url = "https://files.pythonhosted.org/packages/4d/7a/a209b29f971a9fdc1da86f917fe4524564924db50d13f0724feed37b2a4d/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28da6b72d4fb14ee200a1bd386ff74ade8992d7f725f2bde2c495a9a98cf4d9f", size = 2937541 },
+    { url = "https://files.pythonhosted.org/packages/3c/1e/b788b50ffc6191e0b1fc2b0d49df8cff16fe415302e5ceb89f619d12c5bc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:34d8cfde551c9916cb92014e040806122295a6800914bab5865deb85623931cf", size = 2819058 },
+    { url = "https://files.pythonhosted.org/packages/36/aa/3626dfa09a0ecc5b57a8c58eeaeb7dd7ca9a37ad9dd681edab5acd55764c/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aaa852d23e125b73d283c98f007e06d4595732104b65402f46e8ef24b588d9f8", size = 3133278 },
+    { url = "https://files.pythonhosted.org/packages/a4/4d/8fbc203838b3d26269f944a89459d94c858f5b3f9a9b6ee9728cdcf69161/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a21a15d5c8e603331b8a59548bbe113564136dc0f5ad8306dd5033459a226da0", size = 3144253 },
+    { url = "https://files.pythonhosted.org/packages/d8/1b/2bd062adeb7c7511b847b32e356024980c0ffcf35f28947792c2d8ad2288/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2fdbd4c067c60a0ac7eca14b6bd18a5bebace54eb757c706b47ea93204f7a37c", size = 3398225 },
+    { url = "https://files.pythonhosted.org/packages/8a/63/38be071b0c8e06840bc6046991636bcb30c27f6bb1e670f4f4bc87cf49cc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd9a0061e403546f7377df940e866c3e678d7d4e9643d0461ea442b4f89e61a", size = 3038874 },
+    { url = "https://files.pythonhosted.org/packages/ec/83/afa94193c09246417c23a3c75a8a0a96bf44ab5630a3015538d0c316dd4b/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:db9484aeb2e200c43b915a1a0150ea885e35f357a5a8fabf7373af333dcc8dbf", size = 9014448 },
+    { url = "https://files.pythonhosted.org/packages/ae/b3/0e1a37d4f84c0f014d43701c11eb8072704f6efe8d8fc2dcdb79c47d76de/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:ed248ab5279e601a30a4d67bdb897ecbe955a50f1e7bb62bd99f07dd11c2f5b6", size = 8937877 },
+    { url = "https://files.pythonhosted.org/packages/ac/33/ff08f50e6d615eb180a4a328c65907feb6ded0b8f990ec923969759dc379/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:9ac78b12e541d4ce67b4dfd970e44c060a2147b9b2a21f509566d556a509c67d", size = 9186645 },
+    { url = "https://files.pythonhosted.org/packages/5f/aa/8ae85f69a9f6012c6f8011c6f4aa1c96154c816e9eea2e1b758601157833/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e5a69c1a4496b81a5ee5d2c1f3f7fbdf95e90a0196101b0ee89ed9956b8a168f", size = 9384380 },
+    { url = "https://files.pythonhosted.org/packages/e8/5b/a5d98c89f747455e8b7a9504910c865d5e51da55e825a7ae641fb5ff0a58/tokenizers-0.21.1-cp39-abi3-win32.whl", hash = "sha256:1039a3a5734944e09de1d48761ade94e00d0fa760c0e0551151d4dd851ba63e3", size = 2239506 },
+    { url = "https://files.pythonhosted.org/packages/e6/b6/072a8e053ae600dcc2ac0da81a23548e3b523301a442a6ca900e92ac35be/tokenizers-0.21.1-cp39-abi3-win_amd64.whl", hash = "sha256:0f0dcbcc9f6e13e675a66d7a5f2f225a736745ce484c1a4e07476a89ccdad382", size = 2435481 },
+]
+
 [[package]]
 name = "toml"
 version = "0.10.2"
@@ -3922,6 +4296,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 },
 ]
 
+[[package]]
+name = "transformers"
+version = "4.50.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "huggingface-hub" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pyyaml" },
+    { name = "regex" },
+    { name = "requests" },
+    { name = "safetensors" },
+    { name = "tokenizers" },
+    { name = "tqdm" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/29/37877123d6633a188997d75dc17d6f526745d63361794348ce748db23d49/transformers-4.50.3.tar.gz", hash = "sha256:1d795d24925e615a8e63687d077e4f7348c2702eb87032286eaa76d83cdc684f", size = 8774363 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/aa/22/733a6fc4a6445d835242f64c490fdd30f4a08d58f2b788613de3f9170692/transformers-4.50.3-py3-none-any.whl", hash = "sha256:6111610a43dec24ef32c3df0632c6b25b07d9711c01d9e1077bdd2ff6b14a38c", size = 10180411 },
+]
+
 [[package]]
 name = "types-requests"
 version = "2.32.0.20241016"