Resolved merge conflicts

2025-08-07 11:08:20 +00:00 · 2025-03-14 14:09:44 -07:00 · 2025-03-14 14:09:44 -07:00 · 3b3195d8e6
commit 3b3195d8e6
parent 7e211f8553 a626b7bce3
69 changed files with 7693 additions and 467 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -0,0 +1,80 @@
 name: Integration tests
 on:
  pull_request:
  push:
    branches: [main]
 jobs:
  ollama:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          python-version: "3.10"
      - name: Install Ollama
        run: |
          curl -fsSL https://ollama.com/install.sh | sh
      - name: Pull Ollama image
        run: |
          ollama pull llama3.2:3b-instruct-fp16
      - name: Start Ollama in background
        run: |
          nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 &
      - name: Set Up Environment and Install Dependencies
        run: |
          uv sync --extra dev --extra test
          uv pip install ollama faiss-cpu
          uv pip install -e .
      - name: Wait for Ollama to start
        run: |
          echo "Waiting for Ollama..."
          for i in {1..30}; do
            if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
              echo "Ollama is running!"
              exit 0
            fi
            sleep 1
          done
          echo "Ollama failed to start"
          ollama ps
          ollama.log
          exit 1
      - name: Start Llama Stack server in background
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
          source .venv/bin/activate
          # TODO: use "llama stack run"
          nohup uv run python -m llama_stack.distribution.server.server --yaml-config ./llama_stack/templates/ollama/run.yaml > server.log 2>&1 &
      - name: Wait for Llama Stack server to be ready
        run: |
          echo "Waiting for Llama Stack server..."
          for i in {1..30}; do
            if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
              echo " Llama Stack server is up!"
              exit 0
            fi
            sleep 1
          done
          echo " Llama Stack server failed to start"
          cat server.log
          exit 1
      - name: Run Inference Integration Tests
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
          uv run pytest -v tests/integration/inference --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -0,0 +1,76 @@
 name: Test Llama Stack Build
 on:
  push:
    branches:
      - main
    paths:
      - 'llama_stack/cli/stack/build.py'
      - 'llama_stack/cli/stack/_build.py'
      - 'llama_stack/distribution/build.*'
      - 'llama_stack/distribution/*.sh'
      - '.github/workflows/providers-build.yml'
  pull_request:
    paths:
      - 'llama_stack/cli/stack/build.py'
      - 'llama_stack/cli/stack/_build.py'
      - 'llama_stack/distribution/build.*'
      - 'llama_stack/distribution/*.sh'
      - '.github/workflows/providers-build.yml'
 jobs:
  generate-matrix:
    runs-on: ubuntu-latest
    outputs:
      templates: ${{ steps.set-matrix.outputs.templates }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Generate Template List
        id: set-matrix
        run: |
          templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
          echo "templates=$templates" >> "$GITHUB_OUTPUT"
  build:
    needs: generate-matrix
    runs-on: ubuntu-latest
    strategy:
      matrix:
        template: ${{ fromJson(needs.generate-matrix.outputs.templates) }}
        image-type: [venv, container]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'
      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          python-version: "3.10"
      - name: Install LlamaStack
        run: |
          uv venv
          source .venv/bin/activate
          uv pip install -e .
      - name: Print build dependencies
        run: |
          uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
      - name: Run Llama Stack Build
        run: |
          uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
      - name: Print dependencies in the image
        if: matrix.image-type == 'venv'
        run: |
          source test/bin/activate
          uv pip list
--- a/.github/workflows/stale_bot.yml
+++ b/.github/workflows/stale_bot.yml
@ -0,0 +1,45 @@
 name: Close stale issues and PRs
 on:
  schedule:
    - cron: '0 0 * * *' # every day at midnight
 env:
  LC_ALL: en_US.UTF-8
 defaults:
  run:
    shell: bash
 permissions:
  contents: read
 jobs:
  stale:
    permissions:
      issues: write
      pull-requests: write
    runs-on: ubuntu-latest
    steps:
      - name: Stale Action
        uses: actions/stale@v9
        with:
          stale-issue-label: 'stale'
          stale-issue-message: >
            This issue has been automatically marked as stale because it has not had activity within 60 days.
            It will be automatically closed if no further activity occurs within 30 days.
          close-issue-message: >
            This issue has been automatically closed due to inactivity.
            Please feel free to reopen if you feel it is still relevant!
          days-before-issue-stale: 60
          days-before-issue-close: 30
          stale-pr-label: 'stale'
          stale-pr-message: >
            This pull request has been automatically marked as stale because it has not had activity within 60 days.
            It will be automatically closed if no further activity occurs within 30 days.
          close-pr-message: >
            This pull request has been automatically closed due to inactivity.
            Please feel free to reopen if you intend to continue working on it!
          days-before-pr-stale: 60
          days-before-pr-close: 30
          operations-per-run: 300
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@ -33,7 +33,7 @@ jobs:
      - name: Run unit tests
        run: |
-          uv run --python ${{ matrix.python }} --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest --cov=llama_stack -s -v tests/unit/ --junitxml=pytest-report-${{ matrix.python }}.xml --cov-report=html:htmlcov-${{ matrix.python }}
+          PYTHON_VERSION=${{ matrix.python }} ./scripts/unit-tests.sh --cov=llama_stack --junitxml=pytest-report-${{ matrix.python }}.xml --cov-report=html:htmlcov-${{ matrix.python }}
      - name: Upload test results
        if: always()
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -8,6 +8,7 @@ repos:
    rev: v5.0.0  # Latest stable version
    hooks:
    -   id: check-merge-conflict
        args: ['--assume-in-merge']
    -   id: trailing-whitespace
        exclude: '\.py$'  # Exclude Python files as Ruff already handles them
    -   id: check-added-large-files
@ -82,6 +83,17 @@ repos:
        require_serial: true
        files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
 -   repo: local
    hooks:
      - id: openapi-codegen
        name: API Spec Codegen
        additional_dependencies:
          - uv==0.6.2
        entry: sh -c 'uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh > /dev/null 2>&1'
        language: python
        pass_filenames: false
        require_serial: true
 ci:
    autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
    autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -108,6 +108,22 @@ uv run pre-commit run --all-files
 > [!CAUTION]
 > Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
 ## Running unit tests
 You can run the unit tests by running:
 ```bash
 source .venv/bin/activate
 ./scripts/unit-tests.sh
 ```
 If you'd like to run for a non-default version of Python (currently 3.10), pass `PYTHON_VERSION` variable as follows:
 ```
 source .venv/bin/activate
 PYTHON_VERSION=3.13 ./scripts/unit-tests.sh
 ```
 ## Adding a new dependency to the project
 To add a new dependency to the project, you can use the `uv` command. For example, to add `foo` to the project, you can run:
--- a/README.md
+++ b/README.md
@ -51,6 +51,10 @@ Here is a list of the various API providers and available distributions that can
 |        PG Vector         |      Single Node       |            |               |     ✅      |            |               |
 |    PyTorch ExecuTorch    |     On-device iOS      |     ✅      |       ✅       |            |            |               |
 |           vLLM           | Hosted and Single Node |            |       ✅       |            |            |               |
 |          OpenAI          |         Hosted         |            |       ✅       |            |            |               |
 |        Anthropic         |         Hosted         |            |       ✅       |            |            |               |
 |          Gemini          |         Hosted         |            |       ✅       |            |            |               |
 ### Distributions
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -30,6 +30,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn"
  ],
  "cerebras": [
@ -62,6 +63,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
@ -97,6 +99,7 @@
    "sqlite-vec",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
@ -132,6 +135,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
@ -168,6 +172,7 @@
    "sqlite-vec",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
@ -203,6 +208,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
@ -236,6 +242,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn"
  ],
  "hf-endpoint": [
@ -270,6 +277,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn"
  ],
  "hf-serverless": [
@ -304,6 +312,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
@ -344,6 +353,7 @@
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "zmq"
  ],
@ -385,6 +395,7 @@
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "zmq"
  ],
@ -414,6 +425,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn"
  ],
  "ollama": [
@ -448,6 +460,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn"
  ],
  "open-benchmark": [
@ -482,8 +495,44 @@
    "together",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn"
  ],
  "passthrough": [
    "aiosqlite",
    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
  ],
  "remote-vllm": [
    "aiosqlite",
    "autoevals",
@ -514,6 +563,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
@ -579,6 +629,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
@ -614,6 +665,7 @@
    "together",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
@ -648,6 +700,7 @@
    "sentencepiece",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "vllm",
    "sentence-transformers --no-deps",
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -2151,6 +2151,48 @@
                }
            }
        },
        "/v1/providers/{provider_id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "OK",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/ProviderInfo"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Providers"
                ],
                "description": "",
                "parameters": [
                    {
                        "name": "provider_id",
                        "in": "path",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ]
            }
        },
        "/v1/tool-runtime/invoke": {
            "post": {
                "responses": {
@ -2642,7 +2684,7 @@
                }
            }
        },
-        "/v1/inspect/providers": {
+        "/v1/providers": {
            "get": {
                "responses": {
                    "200": {
@ -7912,6 +7954,53 @@
                ],
                "title": "InsertChunksRequest"
            },
            "ProviderInfo": {
                "type": "object",
                "properties": {
                    "api": {
                        "type": "string"
                    },
                    "provider_id": {
                        "type": "string"
                    },
                    "provider_type": {
                        "type": "string"
                    },
                    "config": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "api",
                    "provider_id",
                    "provider_type",
                    "config"
                ],
                "title": "ProviderInfo"
            },
            "InvokeToolRequest": {
                "type": "object",
                "properties": {
@ -8124,27 +8213,6 @@
                ],
                "title": "ListModelsResponse"
            },
            "ProviderInfo": {
                "type": "object",
                "properties": {
                    "api": {
                        "type": "string"
                    },
                    "provider_id": {
                        "type": "string"
                    },
                    "provider_type": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "api",
                    "provider_id",
                    "provider_type"
                ],
                "title": "ProviderInfo"
            },
            "ListProvidersResponse": {
                "type": "object",
                "properties": {
@ -10145,6 +10213,10 @@
        {
            "name": "PostTraining (Coming Soon)"
        },
        {
            "name": "Providers",
            "x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
        },
        {
            "name": "Safety"
        },
@ -10191,6 +10263,7 @@
                "Inspect",
                "Models",
                "PostTraining (Coming Soon)",
                "Providers",
                "Safety",
                "Scoring",
                "ScoringFunctions",
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -1444,6 +1444,34 @@ paths:
            schema:
              $ref: '#/components/schemas/InsertChunksRequest'
        required: true
  /v1/providers/{provider_id}:
    get:
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ProviderInfo'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Providers
      description: ''
      parameters:
        - name: provider_id
          in: path
          required: true
          schema:
            type: string
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -1782,7 +1810,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterModelRequest'
        required: true
-  /v1/inspect/providers:
+  /v1/providers:
    get:
      responses:
        '200':
@ -5409,6 +5437,32 @@ components:
        - vector_db_id
        - chunks
      title: InsertChunksRequest
    ProviderInfo:
      type: object
      properties:
        api:
          type: string
        provider_id:
          type: string
        provider_type:
          type: string
        config:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
      additionalProperties: false
      required:
        - api
        - provider_id
        - provider_type
        - config
      title: ProviderInfo
    InvokeToolRequest:
      type: object
      properties:
@ -5544,21 +5598,6 @@ components:
      required:
        - data
      title: ListModelsResponse
    ProviderInfo:
      type: object
      properties:
        api:
          type: string
        provider_id:
          type: string
        provider_type:
          type: string
      additionalProperties: false
      required:
        - api
        - provider_id
        - provider_type
      title: ProviderInfo
    ListProvidersResponse:
      type: object
      properties:
@ -6832,6 +6871,9 @@ tags:
  - name: Inspect
  - name: Models
  - name: PostTraining (Coming Soon)
  - name: Providers
    x-displayName: >-
      Providers API for inspecting, listing, and modifying providers and their configurations.
  - name: Safety
  - name: Scoring
  - name: ScoringFunctions
@ -6856,6 +6898,7 @@ x-tagGroups:
      - Inspect
      - Models
      - PostTraining (Coming Soon)
      - Providers
      - Safety
      - Scoring
      - ScoringFunctions
--- a/docs/source/distributions/self_hosted_distro/passthrough.md
+++ b/docs/source/distributions/self_hosted_distro/passthrough.md
@ -0,0 +1,42 @@
 ---
 orphan: true
 ---
 <!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # Passthrough Distribution
 ```{toctree}
 :maxdepth: 2
 :hidden:
 self
 ```
 The `llamastack/distribution-passthrough` distribution consists of the following provider configurations.
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
 | datasetio | `remote::huggingface`, `inline::localfs` |
 | eval | `inline::meta-reference` |
 | inference | `remote::passthrough`, `inline::sentence-transformers` |
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
 | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::wolfram-alpha`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 ### Environment Variables
 The following environment variables can be configured:
 - `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
 - `PASSTHROUGH_API_KEY`: Passthrough API Key (default: ``)
 - `PASSTHROUGH_URL`: Passthrough URL (default: ``)
 ### Models
 The following models are available by default:
 - `llama3.1-8b-instruct `
 - `llama3.2-11b-vision-instruct `
--- a/docs/source/index.md
+++ b/docs/source/index.md
@ -61,6 +61,10 @@ A number of "adapters" are available for some popular Inference and Vector Store
 |  Groq  |  Hosted  |
 |  SambaNova  |  Hosted  |
 | PyTorch ExecuTorch | On-device iOS, Android |
 |  OpenAI  |  Hosted  |
 |  Anthropic  |  Hosted  |
 |  Gemini  |  Hosted  |
 **Vector IO API**
 |  **Provider** |  **Environments** |
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@ -14,6 +14,7 @@ from llama_stack.schema_utils import json_schema_type
@json_schema_type
 class Api(Enum):
    providers = "providers"
    inference = "inference"
    safety = "safety"
    agents = "agents"
--- a/llama_stack/apis/inspect/inspect.py
+++ b/llama_stack/apis/inspect/inspect.py
@ -11,13 +11,6 @@ from pydantic import BaseModel
 from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
 class ProviderInfo(BaseModel):
    api: str
    provider_id: str
    provider_type: str
@json_schema_type
 class RouteInfo(BaseModel):
    route: str
@ -32,14 +25,21 @@ class HealthInfo(BaseModel):
@json_schema_type
-class VersionInfo(BaseModel):
+class ProviderInfo(BaseModel):
-    version: str
+    api: str
    provider_id: str
    provider_type: str
 class ListProvidersResponse(BaseModel):
    data: List[ProviderInfo]
@json_schema_type
 class VersionInfo(BaseModel):
    version: str
 class ListRoutesResponse(BaseModel):
    data: List[RouteInfo]
--- a/llama_stack/apis/providers/init.py
+++ b/llama_stack/apis/providers/init.py
@ -0,0 +1,7 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .providers import *  # noqa: F401 F403
--- a/llama_stack/apis/providers/providers.py
+++ b/llama_stack/apis/providers/providers.py
@ -0,0 +1,36 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any, Dict, List, Protocol, runtime_checkable
 from pydantic import BaseModel
 from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
 class ProviderInfo(BaseModel):
    api: str
    provider_id: str
    provider_type: str
    config: Dict[str, Any]
 class ListProvidersResponse(BaseModel):
    data: List[ProviderInfo]
@runtime_checkable
 class Providers(Protocol):
    """
    Providers API for inspecting, listing, and modifying providers and their configurations.
    """
    @webmethod(route="/providers", method="GET")
    async def list_providers(self) -> ListProvidersResponse: ...
    @webmethod(route="/providers/{provider_id}", method="GET")
    async def inspect_provider(self, provider_id: str) -> ProviderInfo: ...
--- a/llama_stack/cli/download.py
+++ b/llama_stack/cli/download.py
@ -10,7 +10,7 @@ import json
 import os
 import shutil
 from dataclasses import dataclass
-from datetime import datetime
+from datetime import datetime, timezone
 from functools import partial
 from pathlib import Path
 from typing import Dict, List, Optional
@ -404,7 +404,7 @@ def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
        d = json.load(f)
        manifest = Manifest(**d)
-    if datetime.now() > manifest.expires_on:
+    if datetime.now(timezone.utc) > manifest.expires_on:
        raise ValueError(f"Manifest URLs have expired on {manifest.expires_on}")
    console = Console()
--- a/llama_stack/cli/model/prompt_format.py
+++ b/llama_stack/cli/model/prompt_format.py
@ -41,8 +41,14 @@ class ModelPromptFormat(Subcommand):
            "-m",
            "--model-name",
            type=str,
-            default="llama3_1",
+            help="Example: Llama3.1-8B or Llama3.2-11B-Vision, etc\n"
-            help="Model Family (llama3_1, llama3_X, etc.)",
+            "(Run `llama model list` to see a list of valid model names)",
        )
        self.parser.add_argument(
            "-l",
            "--list",
            action="store_true",
            help="List all available models",
        )
        self.parser.add_argument(
            "-l",
@ -60,7 +66,6 @@ class ModelPromptFormat(Subcommand):
        ]
        model_list = [m.value for m in supported_model_ids]
        model_str = "\n".join(model_list)
        if args.list:
            headers = ["Model(s)"]
@ -81,10 +86,16 @@ class ModelPromptFormat(Subcommand):
        try:
            model_id = CoreModelId(args.model_name)
        except ValueError:
-            self.parser.error(f"{args.model_name} is not a valid Model. Choose one from --\n{model_str}")
+            self.parser.error(
                f"{args.model_name} is not a valid Model. Choose one from the list of valid models. "
                f"Run `llama model list` to see the valid model names."
            )
        if model_id not in supported_model_ids:
-            self.parser.error(f"{model_id} is not a valid Model. Choose one from --\n {model_str}")
+            self.parser.error(
                f"{model_id} is not a valid Model. Choose one from the list of valid models. "
                f"Run `llama model list` to see the valid model names."
            )
        llama_3_1_file = ROOT_DIR / "models" / "llama" / "llama3_1" / "prompt_format.md"
        llama_3_2_text_file = ROOT_DIR / "models" / "llama" / "llama3_2" / "text_prompt_format.md"
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@ -62,7 +62,7 @@ def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec
    if config.apis:
        apis_to_serve = config.apis
    else:
-        apis_to_serve = [a.value for a in Api if a not in (Api.telemetry, Api.inspect)]
+        apis_to_serve = [a.value for a in Api if a not in (Api.telemetry, Api.inspect, Api.providers)]
    for api_str in apis_to_serve:
        api = Api(api_str)
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -117,6 +117,14 @@ class Provider(BaseModel):
    config: Dict[str, Any]
 class LoggingConfig(BaseModel):
    category_levels: Dict[str, str] = Field(
        default_factory=Dict,
        description="""
 Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
    )
 class ServerConfig(BaseModel):
    port: int = Field(
        default=8321,
@ -176,6 +184,8 @@ a default SQLite store will be used.""",
    benchmarks: List[BenchmarkInput] = Field(default_factory=list)
    tool_groups: List[ToolGroupInput] = Field(default_factory=list)
    logging: Optional[LoggingConfig] = Field(default=None, description="Configuration for Llama Stack Logging")
    server: ServerConfig = Field(
        default_factory=ServerConfig,
        description="Configuration for the HTTP(S) server",
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -56,7 +56,7 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
 def providable_apis() -> List[Api]:
    routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
-    return [api for api in Api if api not in routing_table_apis and api != Api.inspect]
+    return [api for api in Api if api not in routing_table_apis and api != Api.inspect and api != Api.providers]
 def get_provider_registry() -> Dict[Api, Dict[str, ProviderSpec]]:
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@ -0,0 +1,59 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from pydantic import BaseModel
 from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
 from .datatypes import StackRunConfig
 from .stack import redact_sensitive_fields
 class ProviderImplConfig(BaseModel):
    run_config: StackRunConfig
 async def get_provider_impl(config, deps):
    impl = ProviderImpl(config, deps)
    await impl.initialize()
    return impl
 class ProviderImpl(Providers):
    def __init__(self, config, deps):
        self.config = config
        self.deps = deps
    async def initialize(self) -> None:
        pass
    async def list_providers(self) -> ListProvidersResponse:
        run_config = self.config.run_config
        safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
        ret = []
        for api, providers in safe_config.providers.items():
            ret.extend(
                [
                    ProviderInfo(
                        api=api,
                        provider_id=p.provider_id,
                        provider_type=p.provider_type,
                        config=p.config,
                    )
                    for p in providers
                ]
            )
        return ListProvidersResponse(data=ret)
    async def inspect_provider(self, provider_id: str) -> ProviderInfo:
        all_providers = await self.list_providers()
        for p in all_providers.data:
            if p.provider_id == provider_id:
                return p
        raise ValueError(f"Provider {provider_id} not found")
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -16,6 +16,7 @@ from llama_stack.apis.inference import Inference
 from llama_stack.apis.inspect import Inspect
 from llama_stack.apis.models import Models
 from llama_stack.apis.post_training import PostTraining
 from llama_stack.apis.providers import Providers as ProvidersAPI
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFunctions
@ -59,6 +60,7 @@ class InvalidProviderError(Exception):
 def api_protocol_map() -> Dict[Api, Any]:
    return {
        Api.providers: ProvidersAPI,
        Api.agents: Agents,
        Api.inference: Inference,
        Api.inspect: Inspect,
@ -247,6 +249,25 @@ def sort_providers_by_deps(
        )
    )
    sorted_providers.append(
        (
            "providers",
            ProviderWithSpec(
                provider_id="__builtin__",
                provider_type="__builtin__",
                config={"run_config": run_config.model_dump()},
                spec=InlineProviderSpec(
                    api=Api.providers,
                    provider_type="__builtin__",
                    config_class="llama_stack.distribution.providers.ProviderImplConfig",
                    module="llama_stack.distribution.providers",
                    api_dependencies=apis,
                    deps__=[x.value for x in apis],
                ),
            ),
        )
    )
    logger.debug(f"Resolved {len(sorted_providers)} providers")
    for api_str, provider in sorted_providers:
        logger.debug(f" {api_str} => {provider.provider_id}")
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -25,7 +25,7 @@ from fastapi.responses import JSONResponse, StreamingResponse
 from pydantic import BaseModel, ValidationError
 from typing_extensions import Annotated
-from llama_stack.distribution.datatypes import StackRunConfig
+from llama_stack.distribution.datatypes import LoggingConfig, StackRunConfig
 from llama_stack.distribution.distribution import builtin_automatically_routed_apis
 from llama_stack.distribution.request_headers import (
    PROVIDER_DATA_VAR,
@ -306,34 +306,42 @@ def main():
    args = parser.parse_args()
-    if args.env:
+    log_line = ""
        for env_pair in args.env:
            try:
                key, value = validate_env_pair(env_pair)
                logger.info(f"Setting CLI environment variable {key} => {value}")
                os.environ[key] = value
            except ValueError as e:
                logger.error(f"Error: {str(e)}")
                sys.exit(1)
    if args.yaml_config:
        # if the user provided a config file, use it, even if template was specified
        config_file = Path(args.yaml_config)
        if not config_file.exists():
            raise ValueError(f"Config file {config_file} does not exist")
-        logger.info(f"Using config file: {config_file}")
+        log_line = f"Using config file: {config_file}"
    elif args.template:
        config_file = Path(REPO_ROOT) / "llama_stack" / "templates" / args.template / "run.yaml"
        if not config_file.exists():
            raise ValueError(f"Template {args.template} does not exist")
-        logger.info(f"Using template {args.template} config file: {config_file}")
+        log_line = f"Using template {args.template} config file: {config_file}"
    else:
        raise ValueError("Either --yaml-config or --template must be provided")
    logger_config = None
    with open(config_file, "r") as fp:
-        config = replace_env_vars(yaml.safe_load(fp))
+        config_contents = yaml.safe_load(fp)
        if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
            logger_config = LoggingConfig(**cfg)
        logger = get_logger(name=__name__, category="server", config=logger_config)
        if args.env:
            for env_pair in args.env:
                try:
                    key, value = validate_env_pair(env_pair)
                    logger.info(f"Setting CLI environment variable {key} => {value}")
                    os.environ[key] = value
                except ValueError as e:
                    logger.error(f"Error: {str(e)}")
                    sys.exit(1)
        config = replace_env_vars(config_contents)
        config = StackRunConfig(**config)
    # now that the logger is initialized, print the line about which type of config we are using.
    logger.info(log_line)
    logger.info("Run configuration:")
    safe_config = redact_sensitive_fields(config.model_dump())
    logger.info(yaml.dump(safe_config, indent=2))
@ -368,6 +376,7 @@ def main():
        apis_to_serve.add(inf.routing_table_api.value)
    apis_to_serve.add("inspect")
    apis_to_serve.add("providers")
    for api_str in apis_to_serve:
        api = Api(api_str)
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -23,6 +23,7 @@ from llama_stack.apis.inference import Inference
 from llama_stack.apis.inspect import Inspect
 from llama_stack.apis.models import Models
 from llama_stack.apis.post_training import PostTraining
 from llama_stack.apis.providers import Providers
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFunctions
@ -44,6 +45,7 @@ logger = get_logger(name=__name__, category="core")
 class LlamaStack(
    Providers,
    VectorDBs,
    Inference,
    BatchInference,
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@ -7,13 +7,15 @@
 import logging
 import os
 from logging.config import dictConfig
-from typing import Dict
+from typing import Dict, Optional
 from rich.console import Console
 from rich.errors import MarkupError
 from rich.logging import RichHandler
 from termcolor import cprint
 from .distribution.datatypes import LoggingConfig
 # Default log level
 DEFAULT_LOG_LEVEL = logging.INFO
@ -34,6 +36,56 @@ CATEGORIES = [
 _category_levels: Dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES}
 def config_to_category_levels(category: str, level: str):
    """
    Helper function to be called either by environment parsing or yaml parsing to go from a list of categories and levels to a dictionary ready to be
    used by the logger dictConfig.
    Parameters:
        category (str): logging category to apply the level to
        level (str): logging level to be used in the category
    Returns:
        Dict[str, int]: A dictionary mapping categories to their log levels.
    """
    category_levels: Dict[str, int] = {}
    level_value = logging._nameToLevel.get(str(level).upper())
    if level_value is None:
        logging.warning(f"Unknown log level '{level}' for category '{category}'. Falling back to default 'INFO'.")
        return category_levels
    if category == "all":
        # Apply the log level to all categories and the root logger
        for cat in CATEGORIES:
            category_levels[cat] = level_value
        # Set the root logger's level to the specified level
        category_levels["root"] = level_value
    elif category in CATEGORIES:
        category_levels[category] = level_value
        logging.info(f"Setting '{category}' category to level '{level}'.")
    else:
        logging.warning(f"Unknown logging category: {category}. No changes made.")
    return category_levels
 def parse_yaml_config(yaml_config: LoggingConfig) -> Dict[str, int]:
    """
    Helper function to parse a yaml logging configuration found in the run.yaml
    Parameters:
        yaml_config (Logging): the logger config object found in the run.yaml
    Returns:
        Dict[str, int]: A dictionary mapping categories to their log levels.
    """
    category_levels = {}
    for category, level in yaml_config.category_levels.items():
        category_levels.update(config_to_category_levels(category=category, level=level))
    return category_levels
 def parse_environment_config(env_config: str) -> Dict[str, int]:
    """
    Parse the LLAMA_STACK_LOGGING environment variable and return a dictionary of category log levels.
@ -53,25 +105,7 @@ def parse_environment_config(env_config: str) -> Dict[str, int]:
            category, level = pair.split("=", 1)
            category = category.strip().lower()
            level = level.strip().upper()  # Convert to uppercase for logging._nameToLevel
-
+            category_levels.update(config_to_category_levels(category=category, level=level))
            level_value = logging._nameToLevel.get(level)
            if level_value is None:
                logging.warning(
                    f"Unknown log level '{level}' for category '{category}'. Falling back to default 'INFO'."
                )
                continue
            if category == "all":
                # Apply the log level to all categories and the root logger
                for cat in CATEGORIES:
                    category_levels[cat] = level_value
                # Set the root logger's level to the specified level
                category_levels["root"] = level_value
            elif category in CATEGORIES:
                category_levels[category] = level_value
                logging.info(f"Setting '{category}' category to level '{level}'.")
            else:
                logging.warning(f"Unknown logging category: {category}. No changes made.")
        except ValueError:
            logging.warning(f"Invalid logging configuration: '{pair}'. Expected format: 'category=level'.")
@ -176,7 +210,9 @@ def setup_logging(category_levels: Dict[str, int], log_file: str | None) -> None
            logger.setLevel(root_level)
-def get_logger(name: str, category: str = "uncategorized") -> logging.LoggerAdapter:
+def get_logger(
    name: str, category: str = "uncategorized", config: Optional[LoggingConfig] | None = None
 ) -> logging.LoggerAdapter:
    """
    Returns a logger with the specified name and category.
    If no category is provided, defaults to 'uncategorized'.
@ -184,10 +220,14 @@ def get_logger(name: str, category: str = "uncategorized") -> logging.LoggerAdap
    Parameters:
        name (str): The name of the logger (e.g., module or filename).
        category (str): The category of the logger (default 'uncategorized').
        config (Logging): optional yaml config to override the existing logger configuration
    Returns:
        logging.LoggerAdapter: Configured logger with category support.
    """
    if config:
        _category_levels.update(parse_yaml_config(config))
    logger = logging.getLogger(name)
    logger.setLevel(_category_levels.get(category, DEFAULT_LOG_LEVEL))
    return logging.LoggerAdapter(logger, {"category": category})
--- a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
+++ b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
@ -34,7 +34,9 @@ class SystemDefaultGenerator(PromptTemplateGeneratorBase):
        )
        return PromptTemplate(
            template_str.lstrip("\n"),
-            {"today": datetime.now().strftime("%d %B %Y")},
+            {
                "today": datetime.now().strftime("%d %B %Y")  # noqa: DTZ005 - we don't care about timezones here since we are displaying the date
            },
        )
    def data_examples(self) -> List[Any]:
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -11,7 +11,7 @@ import re
 import secrets
 import string
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import AsyncGenerator, List, Optional, Union
 from urllib.parse import urlparse
@ -239,7 +239,7 @@ class ChatAgent(ShieldRunnerMixin):
            in_progress_tool_call_step = await self.storage.get_in_progress_tool_call_step(
                request.session_id, request.turn_id
            )
-            now = datetime.now().astimezone().isoformat()
+            now = datetime.now(timezone.utc).isoformat()
            tool_execution_step = ToolExecutionStep(
                step_id=(in_progress_tool_call_step.step_id if in_progress_tool_call_step else str(uuid.uuid4())),
                turn_id=request.turn_id,
@ -264,7 +264,7 @@ class ChatAgent(ShieldRunnerMixin):
            start_time = last_turn.started_at
        else:
            messages.extend(request.messages)
-            start_time = datetime.now().astimezone().isoformat()
+            start_time = datetime.now(timezone.utc).isoformat()
            input_messages = request.messages
        output_message = None
@ -295,7 +295,7 @@ class ChatAgent(ShieldRunnerMixin):
            input_messages=input_messages,
            output_message=output_message,
            started_at=start_time,
-            completed_at=datetime.now().astimezone().isoformat(),
+            completed_at=datetime.now(timezone.utc).isoformat(),
            steps=steps,
        )
        await self.storage.add_turn_to_session(request.session_id, turn)
@ -386,7 +386,7 @@ class ChatAgent(ShieldRunnerMixin):
                return
            step_id = str(uuid.uuid4())
-            shield_call_start_time = datetime.now().astimezone().isoformat()
+            shield_call_start_time = datetime.now(timezone.utc).isoformat()
            try:
                yield AgentTurnResponseStreamChunk(
                    event=AgentTurnResponseEvent(
@ -410,7 +410,7 @@ class ChatAgent(ShieldRunnerMixin):
                                turn_id=turn_id,
                                violation=e.violation,
                                started_at=shield_call_start_time,
-                                completed_at=datetime.now().astimezone().isoformat(),
+                                completed_at=datetime.now(timezone.utc).isoformat(),
                            ),
                        )
                    )
@ -433,7 +433,7 @@ class ChatAgent(ShieldRunnerMixin):
                            turn_id=turn_id,
                            violation=None,
                            started_at=shield_call_start_time,
-                            completed_at=datetime.now().astimezone().isoformat(),
+                            completed_at=datetime.now(timezone.utc).isoformat(),
                        ),
                    )
                )
@ -472,7 +472,7 @@ class ChatAgent(ShieldRunnerMixin):
            client_tools[tool.name] = tool
        while True:
            step_id = str(uuid.uuid4())
-            inference_start_time = datetime.now().astimezone().isoformat()
+            inference_start_time = datetime.now(timezone.utc).isoformat()
            yield AgentTurnResponseStreamChunk(
                event=AgentTurnResponseEvent(
                    payload=AgentTurnResponseStepStartPayload(
@ -582,7 +582,7 @@ class ChatAgent(ShieldRunnerMixin):
                            turn_id=turn_id,
                            model_response=copy.deepcopy(message),
                            started_at=inference_start_time,
-                            completed_at=datetime.now().astimezone().isoformat(),
+                            completed_at=datetime.now(timezone.utc).isoformat(),
                        ),
                    )
                )
@ -653,7 +653,7 @@ class ChatAgent(ShieldRunnerMixin):
                            turn_id=turn_id,
                            tool_calls=[tool_call],
                            tool_responses=[],
-                            started_at=datetime.now().astimezone().isoformat(),
+                            started_at=datetime.now(timezone.utc).isoformat(),
                        ),
                    )
                    yield message
@ -670,7 +670,7 @@ class ChatAgent(ShieldRunnerMixin):
                        "input": message.model_dump_json(),
                    },
                ) as span:
-                    tool_execution_start_time = datetime.now().astimezone().isoformat()
+                    tool_execution_start_time = datetime.now(timezone.utc).isoformat()
                    tool_call = message.tool_calls[0]
                    tool_result = await self.execute_tool_call_maybe(
                        session_id,
@ -708,7 +708,7 @@ class ChatAgent(ShieldRunnerMixin):
                                    )
                                ],
                                started_at=tool_execution_start_time,
-                                completed_at=datetime.now().astimezone().isoformat(),
+                                completed_at=datetime.now(timezone.utc).isoformat(),
                            ),
                        )
                    )
--- a/llama_stack/providers/inline/agents/meta_reference/persistence.py
+++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py
@ -7,7 +7,7 @@
 import json
 import logging
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import List, Optional
 from pydantic import BaseModel
@ -36,7 +36,7 @@ class AgentPersistence:
        session_info = AgentSessionInfo(
            session_id=session_id,
            session_name=name,
-            started_at=datetime.now(),
+            started_at=datetime.now(timezone.utc),
        )
        await self.kvstore.set(
            key=f"session:{self.agent_id}:{session_id}",
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@ -12,7 +12,7 @@ from llama_stack.apis.agents import Agents, StepType
 from llama_stack.apis.benchmarks import Benchmark
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference import Inference, UserMessage
+from llama_stack.apis.inference import Inference, SystemMessage, UserMessage
 from llama_stack.apis.scoring import Scoring
 from llama_stack.distribution.datatypes import Api
 from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
@ -118,7 +118,7 @@ class MetaReferenceEvalImpl(
        for i, x in tqdm(enumerate(input_rows)):
            assert ColumnName.chat_completion_input.value in x, "Invalid input row"
            input_messages = json.loads(x[ColumnName.chat_completion_input.value])
-            input_messages = [UserMessage(**x) for x in input_messages]
+            input_messages = [UserMessage(**x) for x in input_messages if x["role"] == "user"]
            # NOTE: only single-turn agent generation is supported. Create a new session for each input row
            session_create_response = await self.agents_api.create_agent_session(agent_id, f"session-{i}")
@ -168,10 +168,11 @@ class MetaReferenceEvalImpl(
                generations.append({ColumnName.generated_answer.value: response.completion_message.content})
            elif ColumnName.chat_completion_input.value in x:
                chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value])
-                input_messages = [UserMessage(**x) for x in chat_completion_input_json]
+                input_messages = [UserMessage(**x) for x in chat_completion_input_json if x["role"] == "user"]
                messages = []
                if candidate.system_message:
                    messages.append(candidate.system_message)
                messages += [SystemMessage(**x) for x in chat_completion_input_json if x["role"] == "system"]
                messages += input_messages
                response = await self.inference_api.chat_completion(
                    model_id=candidate.model,
--- a/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/llama_stack/providers/inline/post_training/torchtune/post_training.py
@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Any, Dict, Optional
 from llama_stack.apis.datasetio import DatasetIO
@ -64,7 +64,7 @@ class TorchtunePostTrainingImpl:
        job_status_response = PostTrainingJobStatusResponse(
            job_uuid=job_uuid,
            status=JobStatus.scheduled,
-            scheduled_at=datetime.now(),
+            scheduled_at=datetime.now(timezone.utc),
        )
        self.jobs[job_uuid] = job_status_response
@ -84,7 +84,7 @@ class TorchtunePostTrainingImpl:
                )
                job_status_response.status = JobStatus.in_progress
-                job_status_response.started_at = datetime.now()
+                job_status_response.started_at = datetime.now(timezone.utc)
                await recipe.setup()
                resources_allocated, checkpoints = await recipe.train()
@ -93,7 +93,7 @@ class TorchtunePostTrainingImpl:
                job_status_response.resources_allocated = resources_allocated
                job_status_response.checkpoints = checkpoints
                job_status_response.status = JobStatus.completed
-                job_status_response.completed_at = datetime.now()
+                job_status_response.completed_at = datetime.now(timezone.utc)
            except Exception:
                job_status_response.status = JobStatus.failed
--- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@ -8,7 +8,7 @@ import gc
 import logging
 import os
 import time
-from datetime import datetime
+from datetime import datetime, timezone
 from functools import partial
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
@ -532,7 +532,7 @@ class LoraFinetuningSingleDevice:
            checkpoint_path = await self.save_checkpoint(epoch=curr_epoch)
            checkpoint = Checkpoint(
                identifier=f"{self.model_id}-sft-{curr_epoch}",
-                created_at=datetime.now(),
+                created_at=datetime.now(timezone.utc),
                epoch=curr_epoch,
                post_training_job_id=self.job_uuid,
                path=checkpoint_path,
--- a/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring.py
@ -22,12 +22,19 @@ from llama_stack.providers.utils.common.data_schema_validator import (
 )
 from .config import BasicScoringConfig
 from .scoring_fn.bfcl_scoring_fn import BFCLScoringFn
 from .scoring_fn.equality_scoring_fn import EqualityScoringFn
 from .scoring_fn.regex_parser_math_response_scoring_fn import RegexParserMathResponseScoringFn
 from .scoring_fn.regex_parser_scoring_fn import RegexParserScoringFn
 from .scoring_fn.subset_of_scoring_fn import SubsetOfScoringFn
-FIXED_FNS = [EqualityScoringFn, SubsetOfScoringFn, RegexParserScoringFn, RegexParserMathResponseScoringFn]
+FIXED_FNS = [
    EqualityScoringFn,
    SubsetOfScoringFn,
    RegexParserScoringFn,
    RegexParserMathResponseScoringFn,
    BFCLScoringFn,
 ]
 class BasicScoringImpl(
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/bfcl_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/bfcl_scoring_fn.py
@ -0,0 +1,93 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
 import re
 from typing import Any, Dict, Optional
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 from ..utils.bfcl.ast_parser import decode_ast
 from ..utils.bfcl.checker import ast_checker, is_empty_output
 from .fn_defs.bfcl import bfcl
 def postprocess(x: Dict[str, Any], test_category: str) -> Dict[str, Any]:
    contain_func_call = False
    error = None
    error_type = None
    checker_result = {}
    try:
        prediction = decode_ast(x["generated_answer"], x["language"]) or ""
        contain_func_call = True
        # if not is_function_calling_format_output(prediction):
        if is_empty_output(prediction):
            contain_func_call = False
            error = "Did not output in the specified format. Note: the model_result is wrapped in a string to ensure json serializability."
            error_type = "ast_decoder:decoder_wrong_output_format"
        else:
            checker_result = ast_checker(
                json.loads(x["function"]),
                prediction,
                json.loads(x["ground_truth"]),
                x["language"],
                test_category=test_category,
                model_name="",
            )
    except Exception as e:
        prediction = ""
        error = f"Invalid syntax. Failed to decode AST. {str(e)}"
        error_type = "ast_decoder:decoder_failed"
    return {
        "prediction": prediction,
        "contain_func_call": contain_func_call,
        "valid": checker_result.get("valid", False),
        "error": error or checker_result.get("error", ""),
        "error_type": error_type or checker_result.get("error_type", ""),
    }
 def gen_valid(x: Dict[str, Any]) -> Dict[str, float]:
    return {"valid": x["valid"]}
 def gen_relevance_acc(x: Dict[str, Any]) -> Dict[str, float]:
    # This function serves for both relevance and irrelevance tests, which share the exact opposite logic.
    # If `test_category` is "irrelevance", the model is expected to output no function call.
    # No function call means either the AST decoding fails (a error message is generated) or the decoded AST does not contain any function call (such as a empty list, `[]`).
    # If `test_category` is "relevance", the model is expected to output to a function call, and empty list doesn't count as a function call.
    acc = not x["contain_func_call"] if "irrelevance" in x["id"] else x["contain_func_call"]
    return {"valid": float(acc)}
 class BFCLScoringFn(RegisteredBaseScoringFn):
    """
    A scoring_fn for BFCL
    """
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.supported_fn_defs_registry = {
            bfcl.identifier: bfcl,
        }
    async def score_row(
        self,
        input_row: Dict[str, Any],
        scoring_fn_identifier: Optional[str] = "bfcl",
        scoring_params: Optional[ScoringFnParams] = None,
    ) -> ScoringResultRow:
        test_category = re.sub(r"_[0-9_-]+$", "", input_row["id"])
        score_result = postprocess(input_row, test_category)
        if test_category in {"irrelevance", "live_relevance", "live_irrelevance"}:
            score = gen_relevance_acc(score_result)["valid"]
        else:
            score = gen_valid(score_result)["valid"]
        return {
            "score": float(score),
        }
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/bfcl.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/bfcl.py
@ -0,0 +1,21 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.apis.common.type_system import NumberType
 from llama_stack.apis.scoring_functions import (
    AggregationFunctionType,
    BasicScoringFnParams,
    ScoringFn,
 )
 bfcl = ScoringFn(
    identifier="basic::bfcl",
    description="BFCL complex scoring",
    return_type=NumberType(),
    provider_id="basic",
    provider_resource_id="bfcl",
    params=BasicScoringFnParams(aggregation_functions=[AggregationFunctionType.accuracy]),
 )
--- a/llama_stack/providers/inline/scoring/basic/utils/bfcl/init.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/bfcl/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/llama_stack/providers/inline/scoring/basic/utils/bfcl/ast_parser.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/bfcl/ast_parser.py
@ -0,0 +1,296 @@
 # ruff: noqa
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import ast
 from .tree_sitter import get_parser
 def parse_java_function_call(source_code):
    if not source_code.endswith(";"):
        source_code += ";"  # Necessary for the parser not to register an error
    parser = get_parser("java")
    tree = parser.parse(bytes(source_code, "utf8"))
    root_node = tree.root_node
    if root_node.has_error:
        raise Exception("Error parsing java the source code.")
    def get_text(node):
        """Returns the text represented by the node."""
        return source_code[node.start_byte : node.end_byte]
    def traverse_node(node, nested=False):
        if node.type == "string_literal":
            if nested:
                return get_text(node)
            # Strip surrounding quotes from string literals
            return get_text(node)[1:-1]
        elif node.type == "character_literal":
            if nested:
                return get_text(node)
            # Strip surrounding single quotes from character literals
            return get_text(node)[1:-1]
        """Traverse the node to collect texts for complex structures."""
        if node.type in [
            "identifier",
            "class_literal",
            "type_identifier",
            "method_invocation",
        ]:
            return get_text(node)
        elif node.type == "array_creation_expression":
            # Handle array creation expression specifically
            type_node = node.child_by_field_name("type")
            value_node = node.child_by_field_name("value")
            type_text = traverse_node(type_node, True)
            value_text = traverse_node(value_node, True)
            return f"new {type_text}[]{value_text}"
        elif node.type == "object_creation_expression":
            # Handle object creation expression specifically
            type_node = node.child_by_field_name("type")
            arguments_node = node.child_by_field_name("arguments")
            type_text = traverse_node(type_node, True)
            if arguments_node:
                # Process each argument carefully, avoiding unnecessary punctuation
                argument_texts = []
                for child in arguments_node.children:
                    if child.type not in [
                        ",",
                        "(",
                        ")",
                    ]:  # Exclude commas and parentheses
                        argument_text = traverse_node(child, True)
                        argument_texts.append(argument_text)
                arguments_text = ", ".join(argument_texts)
                return f"new {type_text}({arguments_text})"
            else:
                return f"new {type_text}()"
        elif node.type == "set":
            # Handling sets specifically
            items = [traverse_node(n, True) for n in node.children if n.type not in [",", "set"]]
            return "{" + ", ".join(items) + "}"
        elif node.child_count > 0:
            return "".join(traverse_node(child, True) for child in node.children)
        else:
            return get_text(node)
    def extract_arguments(args_node):
        arguments = {}
        for child in args_node.children:
            if child.type == "assignment_expression":
                # For named parameters
                name_node, value_node = child.children[0], child.children[2]
                name = get_text(name_node)
                value = traverse_node(value_node)
                if name in arguments:
                    if not isinstance(arguments[name], list):
                        arguments[name] = [arguments[name]]
                    arguments[name].append(value)
                else:
                    arguments[name] = value
                # arguments.append({'name': name, 'value': value})
            elif child.type in ["identifier", "class_literal", "set"]:
                # For unnamed parameters and handling sets
                value = traverse_node(child)
                if None in arguments:
                    if not isinstance(arguments[None], list):
                        arguments[None] = [arguments[None]]
                    arguments[None].append(value)
                else:
                    arguments[None] = value
        return arguments
    def traverse(node):
        if node.type == "method_invocation":
            # Extract the function name and its arguments
            method_name = get_text(node.child_by_field_name("name"))
            class_name_node = node.child_by_field_name("object")
            if class_name_node:
                class_name = get_text(class_name_node)
                function_name = f"{class_name}.{method_name}"
            else:
                function_name = method_name
            arguments_node = node.child_by_field_name("arguments")
            if arguments_node:
                arguments = extract_arguments(arguments_node)
                for key, value in arguments.items():
                    if isinstance(value, list):
                        raise Exception("Error: Multiple arguments with the same name are not supported.")
                return [{function_name: arguments}]
        else:
            for child in node.children:
                result = traverse(child)
                if result:
                    return result
    result = traverse(root_node)
    return result if result else {}
 def parse_javascript_function_call(source_code):
    if not source_code.endswith(";"):
        source_code += ";"  # Necessary for the parser not to register an error
    parser = get_parser("javascript")
    # Parse the source code
    tree = parser.parse(bytes(source_code, "utf8"))
    root_node = tree.root_node
    if root_node.has_error:
        raise Exception("Error js parsing the source code.")
    # Function to recursively extract argument details
    def extract_arguments(node):
        args = {}
        for child in node.children:
            if child.type == "assignment_expression":
                # Extract left (name) and right (value) parts of the assignment
                name = child.children[0].text.decode("utf-8")
                value = child.children[2].text.decode("utf-8")
                if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
                    value = value[1:-1]  # Trim the quotation marks
                if name in args:
                    if not isinstance(args[name], list):
                        args[name] = [args[name]]
                    args[name].append(value)
                else:
                    args[name] = value
            elif child.type == "identifier" or child.type == "true":
                # Handle non-named arguments and boolean values
                value = child.text.decode("utf-8")
                if None in args:
                    if not isinstance(args[None], list):
                        args[None] = [args[None]]
                    args[None].append(value)
                else:
                    args[None] = value
        return args
    # Find the function call and extract its name and arguments
    if root_node.type == "program":
        for child in root_node.children:
            if child.type == "expression_statement":
                for sub_child in child.children:
                    if sub_child.type == "call_expression":
                        function_name = sub_child.children[0].text.decode("utf8")
                        arguments_node = sub_child.children[1]
                        parameters = extract_arguments(arguments_node)
                        for key, value in parameters.items():
                            if isinstance(value, list):
                                raise Exception("Error: Multiple arguments with the same name are not supported.")
                        result = [{function_name: parameters}]
                        return result
 def ast_parse(input_str, language="Python"):
    if language == "Python":
        cleaned_input = input_str.strip("[]'")
        parsed = ast.parse(cleaned_input, mode="eval")
        extracted = []
        if isinstance(parsed.body, ast.Call):
            extracted.append(resolve_ast_call(parsed.body))
        else:
            for elem in parsed.body.elts:
                extracted.append(resolve_ast_call(elem))
        return extracted
    elif language == "Java":
        return parse_java_function_call(input_str[1:-1])  # Remove the [ and ] from the string
    elif language == "JavaScript":
        return parse_javascript_function_call(input_str[1:-1])
    else:
        raise NotImplementedError(f"Unsupported language: {language}")
 def resolve_ast_call(elem):
    # Handle nested attributes for deeply nested module paths
    func_parts = []
    func_part = elem.func
    while isinstance(func_part, ast.Attribute):
        func_parts.append(func_part.attr)
        func_part = func_part.value
    if isinstance(func_part, ast.Name):
        func_parts.append(func_part.id)
    func_name = ".".join(reversed(func_parts))
    args_dict = {}
    # Parse when args are simply passed as an unnamed dictionary arg
    for arg in elem.args:
        if isinstance(arg, ast.Dict):
            for key, value in zip(arg.keys, arg.values):
                if isinstance(key, ast.Constant):
                    arg_name = key.value
                output = resolve_ast_by_type(value)
                args_dict[arg_name] = output
    for arg in elem.keywords:
        output = resolve_ast_by_type(arg.value)
        args_dict[arg.arg] = output
    return {func_name: args_dict}
 def resolve_ast_by_type(value):
    if isinstance(value, ast.Constant):
        if value.value is Ellipsis:
            output = "..."
        else:
            output = value.value
    elif isinstance(value, ast.UnaryOp):
        output = -value.operand.value
    elif isinstance(value, ast.List):
        output = [resolve_ast_by_type(v) for v in value.elts]
    elif isinstance(value, ast.Dict):
        output = {resolve_ast_by_type(k): resolve_ast_by_type(v) for k, v in zip(value.keys, value.values)}
    elif isinstance(value, ast.NameConstant):  # Added this condition to handle boolean values
        output = value.value
    elif isinstance(value, ast.BinOp):  # Added this condition to handle function calls as arguments
        output = eval(ast.unparse(value))
    elif isinstance(value, ast.Name):
        output = value.id
    elif isinstance(value, ast.Call):
        if len(value.keywords) == 0:
            output = ast.unparse(value)
        else:
            output = resolve_ast_call(value)
    elif isinstance(value, ast.Tuple):
        output = tuple(resolve_ast_by_type(v) for v in value.elts)
    elif isinstance(value, ast.Lambda):
        output = eval(ast.unparse(value.body[0].value))
    elif isinstance(value, ast.Ellipsis):
        output = "..."
    elif isinstance(value, ast.Subscript):
        try:
            output = ast.unparse(value.body[0].value)
        except:
            output = ast.unparse(value.value) + "[" + ast.unparse(value.slice) + "]"
    else:
        raise Exception(f"Unsupported AST type: {type(value)}")
    return output
 def decode_ast(result, language="Python"):
    func = result
    func = func.replace("\n", "")  # remove new line characters
    if not func.startswith("["):
        func = "[" + func
    if not func.endswith("]"):
        func = func + "]"
    decoded_output = ast_parse(func, language)
    return decoded_output
 def decode_execute(result):
    func = result
    func = func.replace("\n", "")  # remove new line characters
    if not func.startswith("["):
        func = "[" + func
    if not func.endswith("]"):
        func = func + "]"
    decode_output = ast_parse(func)
    execution_list = []
    for function_call in decode_output:
        for key, value in function_call.items():
            execution_list.append(f"{key}({','.join([f'{k}={repr(v)}' for k, v in value.items()])})")
    return execution_list
--- a/llama_stack/providers/inline/scoring/basic/utils/bfcl/checker.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/bfcl/checker.py
@ -0,0 +1,989 @@
 # ruff: noqa
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
 import re
 import time
 from typing import Any
 # Comment out for now until we actually use the rest checker in evals
 # import requests  # Do not remove this import even though it seems to be unused. It's used in the executable_checker_rest function.
 class NoAPIKeyError(Exception):
    def __init__(self):
        self.message = "❗️Please fill in the API keys in the function_credential_config.json file. If you do not provide the API keys, the executable test category results will be inaccurate."
        super().__init__(self.message)
 REAL_TIME_MATCH_ALLOWED_DIFFERENCE = 0.2
 JAVA_TYPE_CONVERSION = {
    "byte": int,
    "short": int,
    "integer": int,
    "float": float,
    "double": float,
    "long": int,
    "boolean": bool,
    "char": str,
    "Array": list,
    "ArrayList": list,
    "Set": set,
    "HashMap": dict,
    "Hashtable": dict,
    "Queue": list,  # this can be `queue.Queue` as well, for simplicity we check with list
    "Stack": list,
    "String": str,
    "any": str,
 }
 JS_TYPE_CONVERSION = {
    "String": str,
    "integer": int,
    "float": float,
    "Bigint": int,
    "Boolean": bool,
    "dict": dict,
    "array": list,
    "any": str,
 }
 # We switch to conditional import for the following two imports to avoid unnecessary installations.
 # User doesn't need to setup the tree-sitter packages if they are not running the test for that language.
 # from js_type_converter import js_type_converter
 # from java_type_converter import java_type_converter
 PYTHON_TYPE_MAPPING = {
    "string": str,
    "integer": int,
    "float": float,
    "boolean": bool,
    "array": list,
    "tuple": list,
    "dict": dict,
    "any": str,
 }
 # This is the list of types that we need to recursively check its values
 PYTHON_NESTED_TYPE_CHECK_LIST = ["array", "tuple"]
 NESTED_CONVERSION_TYPE_LIST = ["Array", "ArrayList", "array"]
 #### Helper functions for AST ####
 def find_description(func_descriptions, name):
    if type(func_descriptions) == list:
        for func_description in func_descriptions:
            if func_description["name"] == name:
                return func_description
        return None
    else:
        # it is a dict, there is only one function
        return func_descriptions
 def get_possible_answer_type(possible_answer: list):
    for answer in possible_answer:
        if answer != "":  # Optional parameter
            return type(answer)
    return None
 def type_checker(
    param: str,
    value,
    possible_answer: list,
    expected_type_description: str,
    expected_type_converted,
    nested_type_converted,
 ):
    # NOTE: This type checker only supports nested type checking for one level deep.
    # We didn't implement recursive type checking for nested types, as it's not needed for the current use case and it's very complex.
    result: Any = {
        "valid": True,
        "error": [],
        "is_variable": False,
        "error_type": "type_error:simple",
    }
    is_variable = False
    # check for the case where a variable is used instead of a actual value.
    # use the type in possible_answer as the expected type
    possible_answer_type = get_possible_answer_type(possible_answer)
    # if possible_answer only contains optional parameters, we can't determine the type
    if possible_answer_type != None:
        # we are being precise here.
        # in fact, possible_answer_type should always be string, as that's how we treat varibale in possible_answer
        if possible_answer_type != expected_type_converted:
            is_variable = True
    # value is the same type as in function description
    if type(value) == expected_type_converted:
        # We don't need to do recursive check for simple types
        if nested_type_converted == None:
            result["is_variable"] = is_variable
            return result
        else:
            for possible_answer_item in possible_answer:
                flag = True  # Each parameter should match to at least one possible answer type.
                # Here, we assume that each item should be the same type. We could also relax it.
                if type(possible_answer_item) == list:
                    for value_item in value:
                        checker_result = type_checker(
                            param,
                            value_item,
                            possible_answer_item,
                            str(nested_type_converted),
                            nested_type_converted,
                            None,
                        )
                        if not checker_result["valid"]:
                            flag = False
                            break
                if flag:
                    return {"valid": True, "error": [], "is_variable": is_variable}
            result["valid"] = False
            result["error"] = [
                f"Nested type checking failed for parameter {repr(param)}. Expected outer type {expected_type_description} with inner type {str(nested_type_converted)}. Parameter value: {repr(value)}."
            ]
            result["error_type"] = "type_error:nested"
    # value is not as expected, check for the case where a variable is used instead of a actual value
    # use the type in possible_answer as the expected type
    possible_answer_type = get_possible_answer_type(possible_answer)
    # if possible_answer only contains optional parameters, we can't determine the type
    if possible_answer_type != None:
        # we are being precise here.
        # in fact, possible_answer_type should always be string, as that's how we treat varibale in possible_answer
        if type(value) == possible_answer_type:
            result["is_variable"] = True
            return result
    result["valid"] = False
    result["error"].append(
        f"Incorrect type for parameter {repr(param)}. Expected type {expected_type_description}, got {type(value).__name__}. Parameter value: {repr(value)}."
    )
    result["error_type"] = "type_error:simple"
    return result
 def standardize_string(input_string: str):
    # This function standardizes the string by removing all the spaces, ",./-_*^" punctuation, and converting it to lowercase
    # It will also convert all the single quotes to double quotes
    # This is used to compare the model output with the possible answers
    # We don't want to punish model for answer like April 1, 2024 vs April 1,2024, vs April 1 2024
    regex_string = r"[ \,\.\/\-\_\*\^]"
    return re.sub(regex_string, "", input_string).lower().replace("'", '"')
 def string_checker(param: str, model_output: str, possible_answer: list):
    standardize_possible_answer = []
    standardize_model_output = standardize_string(model_output)
    for i in range(len(possible_answer)):
        if type(possible_answer[i]) == str:
            standardize_possible_answer.append(standardize_string(possible_answer[i]))
    if standardize_model_output not in standardize_possible_answer:
        return {
            "valid": False,
            "error": [
                f"Invalid value for parameter {repr(param)}: {repr(model_output)}. Expected one of {possible_answer}. Case insensitive."
            ],
            "error_type": "value_error:string",
        }
    return {"valid": True, "error": []}
 def list_checker(param: str, model_output: list, possible_answer: list):
    # Convert the tuple to a list
    standardize_model_output = list(model_output)
    # If the element in the list is a string, we need to standardize it
    for i in range(len(standardize_model_output)):
        if type(standardize_model_output[i]) == str:
            standardize_model_output[i] = standardize_string(model_output[i])
    standardize_possible_answer: Any = []
    # We also need to standardize the possible answers
    for i in range(len(possible_answer)):
        standardize_possible_answer.append([])
        for j in range(len(possible_answer[i])):
            if type(possible_answer[i][j]) == str:
                standardize_possible_answer[i].append(standardize_string(possible_answer[i][j]))
            else:
                standardize_possible_answer[i].append(possible_answer[i][j])
    if standardize_model_output not in standardize_possible_answer:
        return {
            "valid": False,
            "error": [
                f"Invalid value for parameter {repr(param)}: {repr(model_output)}. Expected one of {possible_answer}."
            ],
            "error_type": "value_error:list/tuple",
        }
    return {"valid": True, "error": []}
 def dict_checker(param: str, model_output: dict, possible_answers: list):
    # This function works for simple dictionaries, but not dictionaries with nested dictionaries.
    # The current dataset only contains simple dictionaries, so this is sufficient.
    result = {"valid": False, "error": [], "error_type": "dict_checker:unclear"}
    for i in range(len(possible_answers)):
        if possible_answers[i] == "":
            continue
        result = {"valid": False, "error": [], "error_type": "dict_checker:unclear"}
        flag = True
        possible_answer = possible_answers[i]
        # possible_anwer is a single dictionary
        for key, value in model_output.items():
            if key not in possible_answer:
                result["valid"] = False
                result["error"].append(f"Unexpected dict key parameter: '{key}'.")  # type: ignore[attr-defined]
                result["error_type"] = "value_error:dict_key"
                flag = False
                break
            standardize_value = value
            # If the value is a string, we need to standardize it
            if type(value) == str:
                standardize_value = standardize_string(value)
            # We also need to standardize the possible answers if they are string
            standardize_possible_answer = []
            for i in range(len(possible_answer[key])):
                if type(possible_answer[key][i]) == str:
                    standardize_possible_answer.append(standardize_string(possible_answer[key][i]))
                else:
                    standardize_possible_answer.append(possible_answer[key][i])
            if standardize_value not in standardize_possible_answer:
                result["valid"] = False
                result["error"].append(  # type: ignore[attr-defined]
                    f"Invalid value for parameter {repr(key)}: {repr(value)}. Expected one of {standardize_possible_answer}."
                )
                result["error_type"] = "value_error:dict_value"
                flag = False
                break
        for key, value in possible_answer.items():
            if key not in model_output and "" not in value:
                result["valid"] = False
                result["error"].append(f"Missing dict key parameter: '{key}'.")  # type: ignore[attr-defined]
                result["error_type"] = "value_error:dict_key"
                flag = False
                break
        if flag:
            return {"valid": True, "error": []}
    return result
 def list_dict_checker(param: str, model_output: list, possible_answers: list):
    # This function takes in a list of dictionaries and checks if each dictionary is valid
    # The order of the dictionaries in the list must match the order of the possible answers
    result = {"valid": False, "error": [], "error_type": "list_dict_checker:unclear"}
    for answer_index in range(len(possible_answers)):
        flag = True  # True means so far, all dictionaries are valid
        # Only proceed if the number of dictionaries in the list matches the number of dictionaries in the possible answers
        if len(model_output) != len(possible_answers[answer_index]):
            result["valid"] = False
            result["error"] = ["Wrong number of dictionaries in the list."]
            result["error_type"] = "value_error:list_dict_count"
            flag = False
            continue
        for dict_index in range(len(model_output)):
            result = dict_checker(
                param,
                model_output[dict_index],
                [possible_answers[answer_index][dict_index]],
            )
            if not result["valid"]:
                flag = False
                break
        if flag:
            return {"valid": True, "error": []}
    return result
 def simple_function_checker(
    func_description: dict,
    model_output: dict,
    possible_answer: dict,
    language: str,
    model_name: str,
 ):
    possible_answer = list(possible_answer.values())[0]
    # Extract function name and parameters details
    func_name = func_description["name"]
    param_details = func_description["parameters"]["properties"]
    required_params = func_description["parameters"]["required"]
    # Initialize a result dictionary
    result = {
        "valid": True,
        "error": [],
        "error_type": "simple_function_checker:unclear",
    }
    # Check if function name matches
    if func_name not in model_output:
        result["valid"] = False
        result["error"].append(  # type: ignore[attr-defined]
            f"Function name {repr(func_name)} not found in model output."
        )
        result["error_type"] = "simple_function_checker:wrong_func_name"
        return result
    model_params = model_output[func_name]
    # Check for required parameters in model output
    for param in required_params:
        if param not in model_params:
            result["valid"] = False
            result["error"].append(f"Missing required parameter: {repr(param)}.")  # type: ignore[attr-defined]
            result["error_type"] = "simple_function_checker:missing_required"
            return result
    # Validate types and values for each parameter in model output
    for param, value in model_params.items():
        if param not in param_details or param not in possible_answer:
            result["valid"] = False
            result["error"].append(f"Unexpected parameter: {repr(param)}.")  # type: ignore[attr-defined]
            result["error_type"] = "simple_function_checker:unexpected_param"
            return result
        full_param_details = param_details[param]
        expected_type_description = full_param_details["type"]  # This is a string
        is_variable = False
        nested_type_converted = None
        if language == "Java":
            from evals.utils.bfcl.java_type_converter import java_type_converter
            expected_type_converted = JAVA_TYPE_CONVERSION[expected_type_description]
            if expected_type_description in JAVA_TYPE_CONVERSION:
                if type(value) != str:
                    result["valid"] = False
                    result["error"].append(  # type: ignore[attr-defined]
                        f"Incorrect type for parameter {repr(param)}. Expected type String, got {type(value).__name__}. Parameter value: {repr(value)}."
                    )
                    result["error_type"] = "type_error:java"
                    return result
                if expected_type_description in NESTED_CONVERSION_TYPE_LIST:
                    nested_type = param_details[param]["items"]["type"]
                    nested_type_converted = JAVA_TYPE_CONVERSION[nested_type]
                    value = java_type_converter(value, expected_type_description, nested_type)
                else:
                    value = java_type_converter(value, expected_type_description)
        elif language == "JavaScript":
            from evals.utils.bfcl.js_type_converter import js_type_converter
            expected_type_converted = JS_TYPE_CONVERSION[expected_type_description]
            if expected_type_description in JS_TYPE_CONVERSION:
                if type(value) != str:
                    result["valid"] = False
                    result["error"].append(  # type: ignore[attr-defined]
                        f"Incorrect type for parameter {repr(param)}. Expected type String, got {type(value).__name__}. Parameter value: {repr(value)}."
                    )
                    result["error_type"] = "type_error:js"
                    return result
                if expected_type_description in NESTED_CONVERSION_TYPE_LIST:
                    nested_type = param_details[param]["items"]["type"]
                    nested_type_converted = JS_TYPE_CONVERSION[nested_type]
                    value = js_type_converter(value, expected_type_description, nested_type)
                else:
                    value = js_type_converter(value, expected_type_description)
        elif language == "Python":
            expected_type_converted = PYTHON_TYPE_MAPPING[expected_type_description]
            if expected_type_description in PYTHON_NESTED_TYPE_CHECK_LIST:
                nested_type = param_details[param]["items"]["type"]
                nested_type_converted = PYTHON_TYPE_MAPPING[nested_type]
        # We convert all tuple value to list when the expected type is tuple.
        # The conversion is necessary because any tuple in the possible answer would become a list after being processed through json.dump() and json.load().
        # This does introduce some false positive (eg, when the model provides a list value instead of tuple). We hope to find a better solution in the future.
        if expected_type_description == "tuple" and type(value) == tuple:
            value = list(value)
        # Allow python auto conversion from int to float
        if language == "Python" and expected_type_description == "float" and type(value) == int:
            value = float(value)
        # Type checking
        # In fact, we only check for Python here.
        # Type check for other languages are handled by the type converter, and so their value (after conversion) is always correct.
        type_check_result = type_checker(
            param,
            value,
            possible_answer[param],
            expected_type_description,
            expected_type_converted,
            nested_type_converted,
        )
        is_variable = type_check_result["is_variable"]
        if not type_check_result["valid"]:
            return type_check_result
        # It doesn't make sense to special handle dictionaries and list of dictionaries if the value is a variable.
        # We can just treat the variable as a string and use the normal flow.
        if not is_variable:
            # Special handle for dictionaries
            if expected_type_converted == dict:
                result = dict_checker(param, value, possible_answer[param])
                if not result["valid"]:
                    return result
                continue
            # Special handle for list of dictionaries
            elif expected_type_converted == list and nested_type_converted == dict:
                result = list_dict_checker(param, value, possible_answer[param])
                if not result["valid"]:
                    return result
                continue
            # Special handle for strings
            elif expected_type_converted == str:
                # We don't check for case sensitivity for string, as long as it's not a variable
                result = string_checker(param, value, possible_answer[param])
                if not result["valid"]:
                    return result
                continue
            elif expected_type_converted == list:
                result = list_checker(param, value, possible_answer[param])
                if not result["valid"]:
                    return result
                continue
        # Check if the value is within the possible answers
        if value not in possible_answer[param]:
            result["valid"] = False
            result["error"].append(  # type: ignore[attr-defined]
                f"Invalid value for parameter {repr(param)}: {repr(value)}. Expected one of {possible_answer[param]}."
            )
            result["error_type"] = "value_error:others"
            return result
    # Check for optional parameters not provided but allowed
    for param in possible_answer:
        if param not in model_params and "" not in possible_answer[param]:
            result["valid"] = False
            result["error"].append(  # type: ignore[attr-defined]
                f"Optional parameter {repr(param)} not provided and not marked as optional."
            )
            result["error_type"] = "simple_function_checker:missing_optional"
            return result
    return result
 def parallel_function_checker_enforce_order(
    func_descriptions: list,
    model_output: list,
    possible_answers: dict,
    language: str,
    model_name: str,
 ):
    if len(model_output) != len(possible_answers):
        return {
            "valid": False,
            "error": ["Wrong number of functions."],
            "error_type": "parallel_function_checker_enforce_order:wrong_count",
        }
    func_name_list = list(possible_answers.keys())
    possible_answers_list = []
    for key, value in possible_answers.items():
        possible_answers_list.append({key: value})
    for i in range(len(possible_answers_list)):
        func_description = find_description(func_descriptions, func_name_list[i])
        result = simple_function_checker(
            func_description,
            model_output[i],
            possible_answers_list[i],
            language,
            model_name,
        )
        if not result["valid"]:
            return result
    return {"valid": True, "error": []}
 def parallel_function_checker_no_order(
    func_descriptions: list,
    model_output: list,
    possible_answers: list,
    language: str,
    model_name: str,
 ):
    if len(model_output) != len(possible_answers):
        return {
            "valid": False,
            "error": ["Wrong number of functions."],
            "error_type": "parallel_function_checker_no_order:wrong_count",
        }
    matched_indices = []
    # We go throught the possible answers one by one, and eliminate the model output that matches the possible answer
    # It must be this way because we need ground truth to fetch the correct function description
    for i in range(len(possible_answers)):
        # possible_answers[i] is a dictionary with only one key
        func_name_expected = list(possible_answers[i].keys())[0]
        func_description = find_description(func_descriptions, func_name_expected)
        all_errors = []
        for index in range(len(model_output)):
            if index in matched_indices:
                continue
            result = simple_function_checker(
                func_description,
                model_output[index],
                possible_answers[i],
                language,
                model_name,
            )
            if result["valid"]:
                matched_indices.append(index)
                break
            else:
                all_errors.append(
                    {
                        f"Model Result Index {index}": {
                            "sub_error": result["error"],
                            "sub_error_type": result["error_type"],
                            "model_output_item": model_output[index],
                            "possible_answer_item": possible_answers[i],
                        }
                    }
                )
        if not result["valid"]:
            considered_indices = [i for i in range(len(model_output)) if i not in matched_indices]
            all_errors.insert(
                0,
                f"Could not find a matching function among index {considered_indices} of model output for index {i} of possible answers.",  # type: ignore[arg-type]
            )
            return {
                "valid": False,
                "error": all_errors,
                "error_type": "parallel_function_checker_no_order:cannot_find_match",
            }
    return {"valid": True, "error": []}
 def multiple_function_checker(
    func_descriptions: list,
    model_output: list,
    possible_answers: list,
    language: str,
    model_name: str,
 ):
    if len(model_output) != len(possible_answers):
        return {
            "valid": False,
            "error": ["Wrong number of functions."],
            "error_type": "multiple_function_checker:wrong_count",
        }
    # possible_answers is a list of only one dictionary with only one key
    func_name_expected = list(possible_answers[0].keys())[0]
    func_description = find_description(func_descriptions, func_name_expected)
    return simple_function_checker(
        func_description,
        model_output[0],
        possible_answers[0],
        language,
        model_name,
    )
 def patten_matcher(exec_output, expected_result, function_call, is_sanity_check):
    result = {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
    if type(exec_output) != type(expected_result):
        return {
            "valid": False,
            "error": [
                f"Wrong execution result type for {repr(function_call)}. Expected type: {type(expected_result)}, but got: {type(exec_output)}."
            ],
            "error_type": "executable_checker:wrong_result_type",
            "model_executed_output": exec_output,
        }
    if type(exec_output) == dict:
        # We loose the requirement for the sanity check as the expected result used in the sanity check might not be the most up-to-date one.
        # This happens when the key is a timestamp or a random number.
        if is_sanity_check:
            if len(exec_output) != len(expected_result):
                return {
                    "valid": False,
                    "error": [
                        f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but wrong number of elements in the output. Expected length: {len(expected_result)}, but got: {len(exec_output)}."
                    ],
                    "error_type": "executable_checker:wrong_result_type:dict_length",
                    "model_executed_output": exec_output,
                }
            else:
                return result
        for key, value in expected_result.items():
            if key not in exec_output:
                return {
                    "valid": False,
                    "error": [
                        f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but key {repr(key)} not found in the model output."
                    ],
                    "error_type": "executable_checker:wrong_result_type:dict_key_not_found",
                    "model_executed_output": exec_output,
                }
        for key, value in exec_output.items():
            if key not in expected_result:
                return {
                    "valid": False,
                    "error": [
                        f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but key {repr(key)} not expected in the model output."
                    ],
                    "error_type": "executable_checker:wrong_result_type:dict_extra_key",
                    "model_executed_output": exec_output,
                }
    if type(exec_output) == list:
        if len(exec_output) != len(expected_result):
            return {
                "valid": False,
                "error": [
                    f"Wrong execution result pattern for {repr(function_call)}. Expect type list, but wrong number of elements in the output. Expected length: {len(expected_result)}, but got: {len(exec_output)}."
                ],
                "error_type": "executable_checker:wrong_result_type:list_length",
                "model_executed_output": exec_output,
            }
    return result
 #### Helper functions for Exec ####
 def executable_checker_simple(
    function_call: str,
    expected_result,
    expected_result_type: str,
    is_sanity_check=False,
 ):
    result = {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
    exec_dict: Any = {}
    try:
        exec(
            "from executable_python_function import *" + "\nresult=" + function_call,
            exec_dict,
        )
        exec_output = exec_dict["result"]
    except NoAPIKeyError as e:
        raise e
    except Exception as e:
        result["valid"] = False
        result["error"].append(  # type: ignore[attr-defined]
            f"Error in execution: {repr(function_call)}. Error: {str(e)}"
        )
        result["error_type"] = "executable_checker:execution_error"
        return result
    # We need to special handle the case where the execution result is a tuple and convert it to a list
    # Because when json is stored, the tuple is converted to a list, and so the expected result is a list when loaded from json
    if isinstance(exec_output, tuple):
        exec_output = list(exec_output)
    if expected_result_type == "exact_match":
        if exec_output != expected_result:
            result["valid"] = False
            result["error"].append(  # type: ignore[attr-defined]
                f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}."
            )
            result["error_type"] = "executable_checker:wrong_result"
            result["model_executed_output"] = exec_output
            return result
    elif expected_result_type == "real_time_match":
        # Allow for 5% difference
        if (type(expected_result) == float or type(expected_result) == int) and (
            type(exec_output) == float or type(exec_output) == int
        ):
            if not (
                expected_result * (1 - REAL_TIME_MATCH_ALLOWED_DIFFERENCE)
                <= exec_output
                <= expected_result * (1 + REAL_TIME_MATCH_ALLOWED_DIFFERENCE)
            ):
                result["valid"] = False
                result["error"].append(  # type: ignore[attr-defined]
                    f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}. {REAL_TIME_MATCH_ALLOWED_DIFFERENCE * 100}% difference allowed."
                )
                result["error_type"] = "executable_checker:wrong_result_real_time"
                result["model_executed_output"] = exec_output
                return result
        else:
            result["valid"] = False
            result["error"].append(  # type: ignore[attr-defined]
                f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}. Type needs to be float or int for real time match criteria."
            )
            result["error_type"] = "executable_checker:wrong_result_real_time"
            result["model_executed_output"] = exec_output
            return result
    else:
        # structural match
        pattern_match_result = patten_matcher(exec_output, expected_result, function_call, is_sanity_check)
        if not pattern_match_result["valid"]:
            return pattern_match_result
    return result
 def executable_checker_parallel_no_order(
    decoded_result: list, expected_exec_result: list, expected_exec_result_type: list
 ):
    if len(decoded_result) != len(expected_exec_result):
        return {
            "valid": False,
            "error": [
                f"Wrong number of functions provided. Expected {len(expected_exec_result)}, but got {len(decoded_result)}."
            ],
            "error_type": "value_error:exec_result_count",
        }
    matched_indices = []
    for i in range(len(expected_exec_result)):
        all_errors = []
        for index in range(len(decoded_result)):
            if index in matched_indices:
                continue
            result = executable_checker_simple(
                decoded_result[index],
                expected_exec_result[i],
                expected_exec_result_type[i],
                False,
            )
            if result["valid"]:
                matched_indices.append(index)
                break
            else:
                all_errors.append(
                    {
                        f"Model Result Index {index}": {
                            "sub_error": result["error"],
                            "sub_error_type": result["error_type"],
                            "model_executed_output": (
                                result["model_executed_output"] if "model_executed_output" in result else None
                            ),
                        }
                    }
                )
        if not result["valid"]:
            considered_indices = [i for i in range(len(decoded_result)) if i not in matched_indices]
            all_errors.insert(
                0,
                f"Could not find a matching function among index {considered_indices} of model output for index {i} of possible answers.",  # type: ignore[arg-type]
            )
            return {
                "valid": False,
                "error": all_errors,
                "error_type": "executable_checker:cannot_find_match",
            }
    return {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
 #### Main function ####
 def executable_checker_rest(func_call, idx):
    # Move this here for now to avoid needing to read this file / fix paths to be relative to dataset_dir. Fix when it's actually needed / used.
    EVAL_GROUND_TRUTH_PATH = "/mnt/wsfuse/fair_llm_v2/datasets/eval/bfcl/rest-eval-response_v5.jsonl"  # Ground truth file for v5 for rest execution
    with open(EVAL_GROUND_TRUTH_PATH, "r") as f:
        EVAL_GROUND_TRUTH = f.readlines()
    if "https://geocode.maps.co" in func_call:
        time.sleep(2)
    if "requests_get" in func_call:
        func_call = func_call.replace("requests_get", "requests.get")
    try:
        response = eval(func_call)
    except Exception as e:
        return {
            "valid": False,
            "error": [f"Execution failed. {str(e)}"],
            "error_type": "executable_checker_rest:execution_error",
        }
    try:
        if response.status_code == 200:
            eval_GT_json = json.loads(EVAL_GROUND_TRUTH[idx])
            try:
                if isinstance(eval_GT_json, dict):
                    if isinstance(response.json(), dict):
                        if set(eval_GT_json.keys()) == set(response.json().keys()):
                            return {"valid": True, "error": [], "error_type": ""}
                        return {
                            "valid": False,
                            "error": ["Key inconsistency"],
                            "error_type": "executable_checker_rest:wrong_key",
                        }
                    return {
                        "valid": False,
                        "error": [f"Expected dictionary, but got {type(response.json())}"],
                        "error_type": "executable_checker_rest:wrong_type",
                    }
                elif isinstance(eval_GT_json, list):
                    if isinstance(response.json(), list):
                        if len(eval_GT_json) != len(response.json()):
                            return {
                                "valid": False,
                                "error": [f"Response list length inconsistency."],
                                "error_type": "value_error:exec_result_rest_count",
                            }
                        else:
                            for i in range(len(eval_GT_json)):
                                if set(eval_GT_json[i].keys()) != set(response.json()[i].keys()):
                                    return {
                                        "valid": False,
                                        "error": [f"Key inconsistency"],
                                        "error_type": "executable_checker_rest:wrong_key",
                                    }
                            return {"valid": True, "error": []}
                    else:
                        return {
                            "valid": False,
                            "error": [f"Expected list, but got {type(response.json())}"],
                            "error_type": "executable_checker_rest:wrong_type",
                        }
                return {
                    "valid": False,
                    "error": [f"Expected dict or list, but got {type(response.json())}"],
                    "error_type": "executable_checker_rest:wrong_type",
                }
            except Exception as e:
                return {
                    "valid": False,
                    "error": [
                        f"Error in execution and type checking. Status code: {response.status_code}. Error: {str(e)}"
                    ],
                    "error_type": "executable_checker_rest:response_format_error",
                }
        else:
            return {
                "valid": False,
                "error": [f"Execution result status code is not 200, got {response.status_code}"],
                "error_type": "executable_checker_rest:wrong_status_code",
            }
    except Exception as e:
        return {
            "valid": False,
            "error": [f"Cannot get status code of the response. Error: {str(e)}"],
            "error_type": "executable_checker_rest:cannot_get_status_code",
        }
 def ast_checker(func_description, model_output, possible_answer, language, test_category, model_name):
    if "parallel" in test_category:
        return parallel_function_checker_no_order(func_description, model_output, possible_answer, language, model_name)
    elif "multiple" in test_category:
        return multiple_function_checker(func_description, model_output, possible_answer, language, model_name)
    else:
        if len(model_output) != 1:
            return {
                "valid": False,
                "error": ["Wrong number of functions."],
                "error_type": "simple_function_checker:wrong_count",
            }
        return simple_function_checker(
            func_description[0],
            model_output[0],
            possible_answer[0],
            language,
            model_name,
        )
 def exec_checker(decoded_result: list, func_description: dict, test_category: str):
    if "multiple" in test_category or "parallel" in test_category:
        return executable_checker_parallel_no_order(
            decoded_result,
            func_description["execution_result"],
            func_description["execution_result_type"],
        )
    else:
        if len(decoded_result) != 1:
            return {
                "valid": False,
                "error": ["Wrong number of functions."],
                "error_type": "simple_exec_checker:wrong_count",
            }
        return executable_checker_simple(
            decoded_result[0],
            func_description["execution_result"][0],
            func_description["execution_result_type"][0],
            False,
        )
 def is_empty_output(decoded_output):
    # This function is a patch to the ast decoder for relevance detection
    # Sometimes the ast decoder will parse successfully, but the input doens't really have a function call
    # [], [{}], and anything that is not in function calling format is considered empty (and thus should be marked as correct)
    if not is_function_calling_format_output(decoded_output):
        return True
    if len(decoded_output) == 0:
        return True
    if len(decoded_output) == 1 and len(decoded_output[0]) == 0:
        return True
 def is_function_calling_format_output(decoded_output):
    # Ensure the output is a list of dictionaries
    if type(decoded_output) == list:
        for item in decoded_output:
            if type(item) != dict:
                return False
        return True
    return False
--- a/llama_stack/providers/inline/scoring/basic/utils/bfcl/tree_sitter.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/bfcl/tree_sitter.py
@ -0,0 +1,40 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """
 Tree-sitter changes its API with unfortunate frequency. Modules that need it should
 import it from here so that we can centrally manage things as necessary.
 """
 # These currently work with tree-sitter 0.23.0
 # NOTE: Don't import tree-sitter or any of the language modules in the main module
 # because not all environments have them. Import lazily inside functions where needed.
 import importlib
 import typing
 if typing.TYPE_CHECKING:
    import tree_sitter
 def get_language(language: str) -> "tree_sitter.Language":
    import tree_sitter
    language_module_name = f"tree_sitter_{language}"
    try:
        language_module = importlib.import_module(language_module_name)
    except ModuleNotFoundError as exc:
        raise ValueError(
            f"Language {language} is not found. Please install the tree-sitter-{language} package."
        ) from exc
    return tree_sitter.Language(language_module.language())
 def get_parser(language: str, **kwargs) -> "tree_sitter.Parser":
    import tree_sitter
    lang = get_language(language)
    return tree_sitter.Parser(lang, **kwargs)
--- a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.
 import json
-from datetime import datetime
+from datetime import datetime, timezone
 from opentelemetry.sdk.trace import ReadableSpan
 from opentelemetry.sdk.trace.export import SpanProcessor
@ -34,7 +34,7 @@ class ConsoleSpanProcessor(SpanProcessor):
        if span.attributes and span.attributes.get("__autotraced__"):
            return
-        timestamp = datetime.utcfromtimestamp(span.start_time / 1e9).strftime("%H:%M:%S.%f")[:-3]
+        timestamp = datetime.fromtimestamp(span.start_time / 1e9, tz=timezone.utc).strftime("%H:%M:%S.%f")[:-3]
        print(
            f"{COLORS['dim']}{timestamp}{COLORS['reset']} "
@ -46,7 +46,7 @@ class ConsoleSpanProcessor(SpanProcessor):
        if span.attributes and span.attributes.get("__autotraced__"):
            return
-        timestamp = datetime.utcfromtimestamp(span.end_time / 1e9).strftime("%H:%M:%S.%f")[:-3]
+        timestamp = datetime.fromtimestamp(span.end_time / 1e9, tz=timezone.utc).strftime("%H:%M:%S.%f")[:-3]
        span_context = (
            f"{COLORS['dim']}{timestamp}{COLORS['reset']} "
@ -74,7 +74,7 @@ class ConsoleSpanProcessor(SpanProcessor):
                print(f"    {COLORS['dim']}{key}: {str_value}{COLORS['reset']}")
        for event in span.events:
-            event_time = datetime.utcfromtimestamp(event.timestamp / 1e9).strftime("%H:%M:%S.%f")[:-3]
+            event_time = datetime.fromtimestamp(event.timestamp / 1e9, tz=timezone.utc).strftime("%H:%M:%S.%f")[:-3]
            severity = event.attributes.get("severity", "info")
            message = event.attributes.get("message", event.name)
--- a/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py
@ -8,7 +8,7 @@ import json
 import os
 import sqlite3
 import threading
-from datetime import datetime
+from datetime import datetime, timezone
 from opentelemetry.sdk.trace import SpanProcessor
 from opentelemetry.trace import Span
@ -124,8 +124,8 @@ class SQLiteSpanProcessor(SpanProcessor):
                    trace_id,
                    service_name,
                    (span_id if not parent_span_id else None),
-                    datetime.fromtimestamp(span.start_time / 1e9).isoformat(),
+                    datetime.fromtimestamp(span.start_time / 1e9, timezone.utc).isoformat(),
-                    datetime.fromtimestamp(span.end_time / 1e9).isoformat(),
+                    datetime.fromtimestamp(span.end_time / 1e9, timezone.utc).isoformat(),
                ),
            )
@ -143,8 +143,8 @@ class SQLiteSpanProcessor(SpanProcessor):
                    trace_id,
                    parent_span_id,
                    span.name,
-                    datetime.fromtimestamp(span.start_time / 1e9).isoformat(),
+                    datetime.fromtimestamp(span.start_time / 1e9, timezone.utc).isoformat(),
-                    datetime.fromtimestamp(span.end_time / 1e9).isoformat(),
+                    datetime.fromtimestamp(span.end_time / 1e9, timezone.utc).isoformat(),
                    json.dumps(dict(span.attributes)),
                    span.status.status_code.name,
                    span.kind.name,
@ -161,7 +161,7 @@ class SQLiteSpanProcessor(SpanProcessor):
                    (
                        span_id,
                        event.name,
-                        datetime.fromtimestamp(event.timestamp / 1e9).isoformat(),
+                        datetime.fromtimestamp(event.timestamp / 1e9, timezone.utc).isoformat(),
                        json.dumps(dict(event.attributes)),
                    ),
                )
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_execution.py
+++ b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_execution.py
@ -168,7 +168,7 @@ def process_matplotlib_response(response, matplotlib_dump_dir: str):
    image_paths = []
    for i, img in enumerate(images):
        # create new directory for each day to better organize data:
-        dump_dname = datetime.today().strftime("%Y-%m-%d")
+        dump_dname = datetime.today().strftime("%Y-%m-%d")  # noqa: DTZ002 - we don't care about timezones here since we are displaying the date
        dump_dpath = Path(matplotlib_dump_dir, dump_dname)
        dump_dpath.mkdir(parents=True, exist_ok=True)
        # save image into a file
--- a/llama_stack/providers/registry/eval.py
+++ b/llama_stack/providers/registry/eval.py
@ -14,7 +14,7 @@ def available_providers() -> List[ProviderSpec]:
        InlineProviderSpec(
            api=Api.eval,
            provider_type="inline::meta-reference",
-            pip_packages=[],
+            pip_packages=["tree_sitter"],
            module="llama_stack.providers.inline.eval.meta_reference",
            config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig",
            api_dependencies=[
--- a/llama_stack/providers/tests/safety/conftest.py
+++ b/llama_stack/providers/tests/safety/conftest.py
@ -102,3 +102,4 @@ def pytest_generate_tests(metafunc):
            get_provider_fixture_overrides(metafunc.config, available_fixtures) or DEFAULT_PROVIDER_COMBINATIONS
        )
        metafunc.parametrize("safety_stack", combinations, indirect=True)
--- a/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/llama_stack/providers/utils/common/data_schema_validator.py
@ -23,6 +23,10 @@ class ColumnName(Enum):
    generated_answer = "generated_answer"
    context = "context"
    dialog = "dialog"
    function = "function"
    language = "language"
    id = "id"
    ground_truth = "ground_truth"
 VALID_SCHEMAS_FOR_SCORING = [
@ -37,6 +41,15 @@ VALID_SCHEMAS_FOR_SCORING = [
        ColumnName.generated_answer.value: StringType(),
        ColumnName.context.value: StringType(),
    },
    {
        ColumnName.input_query.value: StringType(),
        ColumnName.expected_answer.value: StringType(),
        ColumnName.generated_answer.value: StringType(),
        ColumnName.function.value: StringType(),
        ColumnName.language.value: StringType(),
        ColumnName.id.value: StringType(),
        ColumnName.ground_truth.value: StringType(),
    },
 ]
 VALID_SCHEMAS_FOR_EVAL = [
@ -50,6 +63,15 @@ VALID_SCHEMAS_FOR_EVAL = [
        ColumnName.expected_answer.value: StringType(),
        ColumnName.completion_input.value: CompletionInputType(),
    },
    {
        ColumnName.input_query.value: StringType(),
        ColumnName.expected_answer.value: StringType(),
        ColumnName.generated_answer.value: StringType(),
        ColumnName.function.value: StringType(),
        ColumnName.language.value: StringType(),
        ColumnName.id.value: StringType(),
        ColumnName.ground_truth.value: StringType(),
    },
 ]
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@ -11,7 +11,7 @@ import logging
 import queue
 import threading
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 from functools import wraps
 from typing import Any, Callable, Dict, List, Optional
@ -86,7 +86,7 @@ class TraceContext:
            span_id=generate_short_uuid(),
            trace_id=self.trace_id,
            name=name,
-            start_time=datetime.now(),
+            start_time=datetime.now(timezone.utc),
            parent_span_id=current_span.span_id if current_span else None,
            attributes=attributes,
        )
@ -203,7 +203,7 @@ class TelemetryHandler(logging.Handler):
            UnstructuredLogEvent(
                trace_id=span.trace_id,
                span_id=span.span_id,
-                timestamp=datetime.now(),
+                timestamp=datetime.now(timezone.utc),
                message=self.format(record),
                severity=severity(record.levelname),
            )
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -26,11 +26,18 @@ providers:
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
  safety:
 <<<<<<< HEAD
  - provider_id: nvidia
    provider_type: remote::nvidia
    config:
      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
      config_id: self-check
 =======
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config:
      excluded_categories: []
 >>>>>>> upstream/main
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -55,6 +62,16 @@ providers:
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db
  datasetio:
 <<<<<<< HEAD
 =======
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/huggingface_datasetio.db
 >>>>>>> upstream/main
  - provider_id: localfs
    provider_type: inline::localfs
    config:
--- a/llama_stack/templates/open-benchmark/init.py
+++ b/llama_stack/templates/open-benchmark/init.py
@ -3,5 +3,8 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 <<<<<<<< HEAD:llama_stack/templates/open-benchmark/__init__.py
 from .open_benchmark import get_distribution_template  # noqa: F401
 ========
 >>>>>>>> upstream/main:llama_stack/providers/inline/scoring/basic/utils/bfcl/__init__.py
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@ -226,6 +226,25 @@ def get_distribution_template() -> DistributionTemplate:
                "chat_completion_input": {"type": "string"},
            },
        ),
 <<<<<<< HEAD
 =======
        DatasetInput(
            dataset_id="bfcl",
            provider_id="huggingface",
            url=URL(uri="https://huggingface.co/datasets/llamastack/bfcl_v3"),
            metadata={
                "path": "llamastack/bfcl_v3",
                "split": "train",
            },
            dataset_schema={
                "function": {"type": "string"},
                "language": {"type": "string"},
                "ground_truth": {"type": "string"},
                "id": {"type": "string"},
                "chat_completion_input": {"type": "string"},
            },
        ),
 >>>>>>> upstream/main
    ]
    default_benchmarks = [
@ -249,6 +268,14 @@ def get_distribution_template() -> DistributionTemplate:
            dataset_id="math_500",
            scoring_functions=["basic::regex_parser_math_response"],
        ),
 <<<<<<< HEAD
 =======
        BenchmarkInput(
            benchmark_id="meta-reference-bfcl",
            dataset_id="bfcl",
            scoring_functions=["basic::bfcl"],
        ),
 >>>>>>> upstream/main
    ]
    return DistributionTemplate(
        name=name,
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@ -216,6 +216,27 @@ datasets:
    split: test
  dataset_id: math_500
  provider_id: huggingface
 <<<<<<< HEAD
 =======
 - dataset_schema:
    function:
      type: string
    language:
      type: string
    ground_truth:
      type: string
    id:
      type: string
    chat_completion_input:
      type: string
  url:
    uri: https://huggingface.co/datasets/llamastack/bfcl_v3
  metadata:
    path: llamastack/bfcl_v3
    split: train
  dataset_id: bfcl
  provider_id: huggingface
 >>>>>>> upstream/main
 scoring_fns: []
 benchmarks:
 - dataset_id: simpleqa
@ -238,6 +259,14 @@ benchmarks:
  - basic::regex_parser_math_response
  metadata: {}
  benchmark_id: meta-reference-math-500
 <<<<<<< HEAD
 =======
 - dataset_id: bfcl
  scoring_functions:
  - basic::bfcl
  metadata: {}
  benchmark_id: meta-reference-bfcl
 >>>>>>> upstream/main
 tool_groups:
 - toolgroup_id: builtin::websearch
  provider_id: tavily-search
--- a/llama_stack/templates/passthrough/init.py
+++ b/llama_stack/templates/passthrough/init.py
@ -0,0 +1,7 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .passthrough import get_distribution_template  # noqa: F401
--- a/llama_stack/templates/passthrough/build.yaml
+++ b/llama_stack/templates/passthrough/build.yaml
@ -1,9 +1,10 @@
 version: '2'
 distribution_spec:
-  description: Use for running LLM inference with the endpoint that compatible with Llama Stack API
+  description: Use Passthrough hosted llama-stack endpoint for LLM inference
  providers:
    inference:
    - remote::passthrough
    - inline::sentence-transformers
    vector_io:
    - inline::faiss
    - remote::chromadb
@ -26,6 +27,7 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
    - remote::wolfram-alpha
    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
--- a/llama_stack/templates/passthrough/doc_template.md
+++ b/llama_stack/templates/passthrough/doc_template.md
@ -0,0 +1,35 @@
 ---
 orphan: true
 ---
 # Passthrough Distribution
 ```{toctree}
 :maxdepth: 2
 :hidden:
 self
 ```
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
 {{ providers_table }}
 {% if run_config_env_vars %}
 ### Environment Variables
 The following environment variables can be configured:
 {% for var, (default_value, description) in run_config_env_vars.items() %}
 - `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
 {% endfor %}
 {% endif %}
 {% if default_models %}
 ### Models
 The following models are available by default:
 {% for model in default_models %}
 - `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
--- a/llama_stack/templates/passthrough/passthrough.py
+++ b/llama_stack/templates/passthrough/passthrough.py
@ -0,0 +1,201 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from pathlib import Path
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
    ModelInput,
    Provider,
    ShieldInput,
    ToolGroupInput,
 )
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.passthrough.config import (
    PassthroughImplConfig,
 )
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.templates.template import (
    DistributionTemplate,
    RunConfigSettings,
 )
 def get_distribution_template() -> DistributionTemplate:
    providers = {
        "inference": ["remote::passthrough", "inline::sentence-transformers"],
        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
        "eval": ["inline::meta-reference"],
        "datasetio": ["remote::huggingface", "inline::localfs"],
        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
            "remote::wolfram-alpha",
            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
    }
    name = "passthrough"
    inference_provider = Provider(
        provider_id="passthrough",
        provider_type="remote::passthrough",
        config=PassthroughImplConfig.sample_run_config(),
    )
    embedding_provider = Provider(
        provider_id="sentence-transformers",
        provider_type="inline::sentence-transformers",
        config=SentenceTransformersInferenceConfig.sample_run_config(),
    )
    vector_io_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
    )
    default_models = [
        ModelInput(
            metadata={},
            model_id="meta-llama/Llama-3.1-8B-Instruct",
            provider_id="passthrough",
            provider_model_id="llama3.1-8b-instruct",
            model_type=ModelType.llm,
        ),
        ModelInput(
            metadata={},
            model_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
            provider_id="passthrough",
            provider_model_id="llama3.2-11b-vision-instruct",
            model_type=ModelType.llm,
        ),
    ]
    embedding_model = ModelInput(
        model_id="all-MiniLM-L6-v2",
        provider_id="sentence-transformers",
        model_type=ModelType.embedding,
        metadata={
            "embedding_dimension": 384,
        },
    )
    default_tool_groups = [
        ToolGroupInput(
            toolgroup_id="builtin::websearch",
            provider_id="tavily-search",
        ),
        ToolGroupInput(
            toolgroup_id="builtin::wolfram_alpha",
            provider_id="wolfram-alpha",
        ),
        ToolGroupInput(
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
        ToolGroupInput(
            toolgroup_id="builtin::code_interpreter",
            provider_id="code-interpreter",
        ),
    ]
    return DistributionTemplate(
        name=name,
        distro_type="self_hosted",
        description="Use Passthrough hosted llama-stack endpoint for LLM inference",
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        available_models_by_provider={
            "passthrough": [
                ProviderModelEntry(
                    provider_model_id="llama3.1-8b-instruct",
                    model_type=ModelType.llm,
                ),
                ProviderModelEntry(
                    provider_model_id="llama3.2-11b-vision-instruct",
                    model_type=ModelType.llm,
                ),
            ],
        },
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider, embedding_provider],
                    "vector_io": [vector_io_provider],
                },
                default_models=default_models + [embedding_model],
                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
                default_tool_groups=default_tool_groups,
            ),
            "run-with-safety.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [
                        inference_provider,
                        embedding_provider,
                    ],
                    "vector_io": [vector_io_provider],
                    "safety": [
                        Provider(
                            provider_id="llama-guard",
                            provider_type="inline::llama-guard",
                            config={},
                        ),
                        Provider(
                            provider_id="llama-guard-vision",
                            provider_type="inline::llama-guard",
                            config={},
                        ),
                        Provider(
                            provider_id="code-scanner",
                            provider_type="inline::code-scanner",
                            config={},
                        ),
                    ],
                },
                default_models=[
                    *default_models,
                    embedding_model,
                ],
                default_shields=[
                    ShieldInput(
                        shield_id="meta-llama/Llama-Guard-3-8B",
                        provider_id="llama-guard",
                    ),
                    ShieldInput(
                        shield_id="meta-llama/Llama-Guard-3-11B-Vision",
                        provider_id="llama-guard-vision",
                    ),
                    ShieldInput(
                        shield_id="CodeScanner",
                        provider_id="code-scanner",
                    ),
                ],
                default_tool_groups=default_tool_groups,
            ),
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
                "5001",
                "Port for the Llama Stack distribution server",
            ),
            "PASSTHROUGH_API_KEY": (
                "",
                "Passthrough API Key",
            ),
            "PASSTHROUGH_URL": (
                "",
                "Passthrough URL",
            ),
        },
    )
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@ -0,0 +1,154 @@
 version: '2'
 image_name: passthrough
 apis:
 - agents
 - datasetio
 - eval
 - inference
 - safety
 - scoring
 - telemetry
 - tool_runtime
 - vector_io
 providers:
  inference:
  - provider_id: passthrough
    provider_type: remote::passthrough
    config:
      url: ${env.PASSTHROUGH_URL}
      api_key: ${env.PASSTHROUGH_API_KEY}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  - provider_id: llama-guard-vision
    provider_type: inline::llama-guard
    config: {}
  - provider_id: code-scanner
    provider_type: inline::code-scanner
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
    config: {}
  - provider_id: llm-as-judge
    provider_type: inline::llm-as-judge
    config: {}
  - provider_id: braintrust
    provider_type: inline::braintrust
    config:
      openai_api_key: ${env.OPENAI_API_KEY:}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
      api_key: ${env.BRAVE_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
    config:
      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  - provider_id: code-interpreter
    provider_type: inline::code-interpreter
    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
 models:
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: passthrough
  provider_model_id: llama3.1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: passthrough
  provider_model_id: llama3.2-11b-vision-instruct
  model_type: llm
 - metadata:
    embedding_dimension: 384
  model_id: all-MiniLM-L6-v2
  provider_id: sentence-transformers
  model_type: embedding
 shields:
 - shield_id: meta-llama/Llama-Guard-3-8B
  provider_id: llama-guard
 - shield_id: meta-llama/Llama-Guard-3-11B-Vision
  provider_id: llama-guard-vision
 - shield_id: CodeScanner
  provider_id: code-scanner
 vector_dbs: []
 datasets: []
 scoring_fns: []
 benchmarks: []
 tool_groups:
 - toolgroup_id: builtin::websearch
  provider_id: tavily-search
 - toolgroup_id: builtin::wolfram_alpha
  provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@ -31,7 +31,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -50,14 +51,26 @@ providers:
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -80,6 +93,10 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
    config:
      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  - provider_id: code-interpreter
    provider_type: inline::code-interpreter
    config: {}
@ -91,7 +108,7 @@ providers:
    config: {}
 metadata_store:
  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-llama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
 models:
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
@ -103,15 +120,22 @@ models:
  provider_id: passthrough
  provider_model_id: llama3.2-11b-vision-instruct
  model_type: llm
 - metadata:
    embedding_dimension: 384
  model_id: all-MiniLM-L6-v2
  provider_id: sentence-transformers
  model_type: embedding
 shields:
 - shield_id: meta-llama/Llama-Guard-3-8B
 vector_dbs: []
 datasets: []
 scoring_fns: []
-eval_tasks: []
+benchmarks: []
 tool_groups:
 - toolgroup_id: builtin::websearch
  provider_id: tavily-search
 - toolgroup_id: builtin::wolfram_alpha
  provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
--- a/pyproject.toml
+++ b/pyproject.toml
@ -124,14 +124,15 @@ exclude = [
 [tool.ruff.lint]
 select = [
-    "B",  # flake8-bugbear
+    "B",   # flake8-bugbear
-    "B9", # flake8-bugbear subset
+    "B9",  # flake8-bugbear subset
-    "C",  # comprehensions
+    "C",   # comprehensions
-    "E",  # pycodestyle
+    "E",   # pycodestyle
-    "F",  # Pyflakes
+    "F",   # Pyflakes
-    "N",  # Naming
+    "N",   # Naming
-    "W",  # Warnings
+    "W",   # Warnings
-    "I",  # isort
+    "I",   # isort
    "DTZ", # datetime rules
 ]
 ignore = [
    # The following ignores are desired by the project maintainers.
@ -145,6 +146,10 @@ ignore = [
    "C901", # Complexity of the function is too high
 ]
 # Ignore the following errors for the following files
 [tool.ruff.lint.per-file-ignores]
 "tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
 [tool.mypy]
 mypy_path = ["llama_stack"]
 packages = ["llama_stack"]
@ -170,6 +175,10 @@ exclude = [
    "^llama_stack/apis/inspect/inspect\\.py$",
    "^llama_stack/apis/models/models\\.py$",
    "^llama_stack/apis/post_training/post_training\\.py$",
 <<<<<<< HEAD
 =======
    "^llama_stack/apis/providers/providers\\.py$",
 >>>>>>> upstream/main
    "^llama_stack/apis/resource\\.py$",
    "^llama_stack/apis/safety/safety\\.py$",
    "^llama_stack/apis/scoring/scoring\\.py$",
--- a/requirements.txt
+++ b/requirements.txt
@ -12,7 +12,7 @@ distro==1.9.0
 exceptiongroup==1.2.2 ; python_full_version < '3.11'
 filelock==3.17.0
 fire==0.7.0
-fsspec==2025.2.0
+fsspec==2024.12.0
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
--- a/scripts/unit-tests.sh
+++ b/scripts/unit-tests.sh
@ -0,0 +1,19 @@
 #!/bin/sh
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 PYTHON_VERSION=${PYTHON_VERSION:-3.10}
 command -v uv >/dev/null 2>&1 || { echo >&2 "uv is required but it's not installed. Exiting."; exit 1; }
 uv python find $PYTHON_VERSION
 FOUND_PYTHON=$?
 if [ $FOUND_PYTHON -ne 0 ]; then
     uv python install $PYTHON_VERSION
 fi
 uv run --python $PYTHON_VERSION --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest -s -v tests/unit/ $@
--- a/tests/integration/agents/test_agents.py
+++ b/tests/integration/agents/test_agents.py
@ -10,8 +10,7 @@ from uuid import uuid4
 import pytest
 from llama_stack_client.lib.agents.agent import Agent
 from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types.agents.turn_create_params import Document as AgentDocument
+from llama_stack_client.types.agents.turn_create_params import Document
 from llama_stack_client.types.memory_insert_params import Document
 from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig
 from llama_stack.apis.agents.agents import (
@ -242,7 +241,7 @@ def test_code_interpreter_for_attachments(llama_stack_client_with_mocked_inferen
    codex_agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
    session_id = codex_agent.create_session(f"test-session-{uuid4()}")
-    inflation_doc = AgentDocument(
+    inflation_doc = Document(
        content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
        mime_type="text/csv",
    )
--- a/tests/integration/datasetio/test_datasetio.py
+++ b/tests/integration/datasetio/test_datasetio.py
@ -9,11 +9,31 @@ import mimetypes
 import os
 from pathlib import Path
 <<<<<<< HEAD
 =======
 import pytest
 >>>>>>> upstream/main
 # How to run this test:
 #
 # LLAMA_STACK_CONFIG="template-name" pytest -v tests/integration/datasetio
 <<<<<<< HEAD
 =======
@pytest.fixture
 def dataset_for_test(llama_stack_client):
    dataset_id = "test_dataset"
    register_dataset(llama_stack_client, dataset_id=dataset_id)
    yield
    # Teardown - this always runs, even if the test fails
    try:
        llama_stack_client.datasets.unregister(dataset_id)
    except Exception as e:
        print(f"Warning: Failed to unregister test_dataset: {e}")
 >>>>>>> upstream/main
 def data_url_from_file(file_path: str) -> str:
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
@ -80,8 +100,12 @@ def test_register_unregister_dataset(llama_stack_client):
    assert len(response) == 0
 <<<<<<< HEAD
 def test_get_rows_paginated(llama_stack_client):
    register_dataset(llama_stack_client)
 =======
 def test_get_rows_paginated(llama_stack_client, dataset_for_test):
 >>>>>>> upstream/main
    response = llama_stack_client.datasetio.get_rows_paginated(
        dataset_id="test_dataset",
        rows_in_page=3,
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -52,6 +52,8 @@ def llama_stack_client_with_mocked_inference(llama_stack_client, request):
    If --record-responses is passed, it will call the real APIs and record the responses.
    """
    # TODO: will rework this to be more stable
    return llama_stack_client
    if not isinstance(llama_stack_client, LlamaStackAsLibraryClient):
        logging.warning(
            "llama_stack_client_with_mocked_inference is not supported for this client, returning original client without mocking"
--- a/tests/integration/fixtures/recorded_responses/chat_completion.json
+++ b/tests/integration/fixtures/recorded_responses/chat_completion.json
--- a/tests/integration/fixtures/recorded_responses/invoke_tool.json
+++ b/tests/integration/fixtures/recorded_responses/invoke_tool.json
--- a/tests/integration/inference/test_vision_inference.py
+++ b/tests/integration/inference/test_vision_inference.py
@ -36,7 +36,7 @@ def test_image_chat_completion_non_streaming(client_with_models, vision_model_id
                "type": "image",
                "image": {
                    "url": {
-                        "uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/api/inference/dog.png"
+                        "uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
                    },
                },
            },
@ -65,7 +65,7 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
                "type": "image",
                "image": {
                    "url": {
-                        "uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/api/inference/dog.png"
+                        "uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
                    },
                },
            },
--- a/tests/integration/providers/init.py
+++ b/tests/integration/providers/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/tests/integration/providers/test_providers.py
+++ b/tests/integration/providers/test_providers.py
@ -0,0 +1,17 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import pytest
 from llama_stack_client import LlamaStackClient
 from llama_stack import LlamaStackAsLibraryClient
 class TestProviders:
    @pytest.mark.asyncio
    def test_list(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
        provider_list = llama_stack_client.providers.list()
        assert provider_list is not None
--- a/tests/integration/scoring/test_scoring.py
+++ b/tests/integration/scoring/test_scoring.py
@ -10,6 +10,19 @@ import pytest
 from ..datasetio.test_datasetio import register_dataset
@pytest.fixture
 def rag_dataset_for_test(llama_stack_client):
    dataset_id = "test_dataset"
    register_dataset(llama_stack_client, for_rag=True, dataset_id=dataset_id)
    yield  # This is where the test function will run
    # Teardown - this always runs, even if the test fails
    try:
        llama_stack_client.datasets.unregister(dataset_id)
    except Exception as e:
        print(f"Warning: Failed to unregister test_dataset: {e}")
@pytest.fixture
 def sample_judge_prompt_template():
    return "Output a number response in the following format: Score: <number>, where <number> is the number between 0 and 9."
@ -79,9 +92,7 @@ def test_scoring_functions_register(
    # TODO: add unregister api for scoring functions
-def test_scoring_score(llama_stack_client):
+def test_scoring_score(llama_stack_client, rag_dataset_for_test):
    register_dataset(llama_stack_client, for_rag=True)
    # scoring individual rows
    rows = llama_stack_client.datasetio.get_rows_paginated(
        dataset_id="test_dataset",
@ -115,9 +126,9 @@ def test_scoring_score(llama_stack_client):
        assert len(response.results[x].score_rows) == 5
-def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge_prompt_template, judge_model_id):
+def test_scoring_score_with_params_llm_as_judge(
-    register_dataset(llama_stack_client, for_rag=True)
+    llama_stack_client, sample_judge_prompt_template, judge_model_id, rag_dataset_for_test
-
+):
    # scoring individual rows
    rows = llama_stack_client.datasetio.get_rows_paginated(
        dataset_id="test_dataset",
@ -167,9 +178,8 @@ def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge
    ],
 )
 def test_scoring_score_with_aggregation_functions(
-    llama_stack_client, sample_judge_prompt_template, judge_model_id, provider_id
+    llama_stack_client, sample_judge_prompt_template, judge_model_id, provider_id, rag_dataset_for_test
 ):
    register_dataset(llama_stack_client, for_rag=True)
    rows = llama_stack_client.datasetio.get_rows_paginated(
        dataset_id="test_dataset",
        rows_in_page=3,
--- a/uv.lock
+++ b/uv.lock
@ -701,6 +701,7 @@ sdist = { url = "https://files.pythonhosted.org/packages/6b/b6/82c7e601d6d3c3278
 [[package]]
 name = "frozenlist"
 version = "1.5.0"
 <<<<<<< HEAD
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/8f/ed/0f4cec13a93c02c47ec32d81d11c0c1efbadf4a471e3f3ce7cad366cbbd3/frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817", size = 39930 }
 wheels = [
@ -770,10 +771,86 @@ wheels = [
 [[package]]
 name = "fsspec"
 version = "2025.2.0"
 =======
 >>>>>>> upstream/main
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b5/79/68612ed99700e6413de42895aa725463e821a6b3be75c87fcce1b4af4c70/fsspec-2025.2.0.tar.gz", hash = "sha256:1c24b16eaa0a1798afa0337aa0db9b256718ab2a89c425371f5628d22c3b6afd", size = 292283 }
+sdist = { url = "https://files.pythonhosted.org/packages/8f/ed/0f4cec13a93c02c47ec32d81d11c0c1efbadf4a471e3f3ce7cad366cbbd3/frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817", size = 39930 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e2/94/758680531a00d06e471ef649e4ec2ed6bf185356a7f9fbfbb7368a40bd49/fsspec-2025.2.0-py3-none-any.whl", hash = "sha256:9de2ad9ce1f85e1931858535bc882543171d197001a0a5eb2ddc04f1781ab95b", size = 184484 },
+    { url = "https://files.pythonhosted.org/packages/54/79/29d44c4af36b2b240725dce566b20f63f9b36ef267aaaa64ee7466f4f2f8/frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a", size = 94451 },
    { url = "https://files.pythonhosted.org/packages/47/47/0c999aeace6ead8a44441b4f4173e2261b18219e4ad1fe9a479871ca02fc/frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb", size = 54301 },
    { url = "https://files.pythonhosted.org/packages/8d/60/107a38c1e54176d12e06e9d4b5d755b677d71d1219217cee063911b1384f/frozenlist-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec", size = 52213 },
    { url = "https://files.pythonhosted.org/packages/17/62/594a6829ac5679c25755362a9dc93486a8a45241394564309641425d3ff6/frozenlist-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5", size = 240946 },
    { url = "https://files.pythonhosted.org/packages/7e/75/6c8419d8f92c80dd0ee3f63bdde2702ce6398b0ac8410ff459f9b6f2f9cb/frozenlist-1.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76", size = 264608 },
    { url = "https://files.pythonhosted.org/packages/88/3e/82a6f0b84bc6fb7e0be240e52863c6d4ab6098cd62e4f5b972cd31e002e8/frozenlist-1.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17", size = 261361 },
    { url = "https://files.pythonhosted.org/packages/fd/85/14e5f9ccac1b64ff2f10c927b3ffdf88772aea875882406f9ba0cec8ad84/frozenlist-1.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba", size = 231649 },
    { url = "https://files.pythonhosted.org/packages/ee/59/928322800306f6529d1852323014ee9008551e9bb027cc38d276cbc0b0e7/frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d", size = 241853 },
    { url = "https://files.pythonhosted.org/packages/7d/bd/e01fa4f146a6f6c18c5d34cab8abdc4013774a26c4ff851128cd1bd3008e/frozenlist-1.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2", size = 243652 },
    { url = "https://files.pythonhosted.org/packages/a5/bd/e4771fd18a8ec6757033f0fa903e447aecc3fbba54e3630397b61596acf0/frozenlist-1.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f", size = 241734 },
    { url = "https://files.pythonhosted.org/packages/21/13/c83821fa5544af4f60c5d3a65d054af3213c26b14d3f5f48e43e5fb48556/frozenlist-1.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c", size = 260959 },
    { url = "https://files.pythonhosted.org/packages/71/f3/1f91c9a9bf7ed0e8edcf52698d23f3c211d8d00291a53c9f115ceb977ab1/frozenlist-1.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab", size = 262706 },
    { url = "https://files.pythonhosted.org/packages/4c/22/4a256fdf5d9bcb3ae32622c796ee5ff9451b3a13a68cfe3f68e2c95588ce/frozenlist-1.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5", size = 250401 },
    { url = "https://files.pythonhosted.org/packages/af/89/c48ebe1f7991bd2be6d5f4ed202d94960c01b3017a03d6954dd5fa9ea1e8/frozenlist-1.5.0-cp310-cp310-win32.whl", hash = "sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb", size = 45498 },
    { url = "https://files.pythonhosted.org/packages/28/2f/cc27d5f43e023d21fe5c19538e08894db3d7e081cbf582ad5ed366c24446/frozenlist-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4", size = 51622 },
    { url = "https://files.pythonhosted.org/packages/79/43/0bed28bf5eb1c9e4301003b74453b8e7aa85fb293b31dde352aac528dafc/frozenlist-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30", size = 94987 },
    { url = "https://files.pythonhosted.org/packages/bb/bf/b74e38f09a246e8abbe1e90eb65787ed745ccab6eaa58b9c9308e052323d/frozenlist-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5", size = 54584 },
    { url = "https://files.pythonhosted.org/packages/2c/31/ab01375682f14f7613a1ade30149f684c84f9b8823a4391ed950c8285656/frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778", size = 52499 },
    { url = "https://files.pythonhosted.org/packages/98/a8/d0ac0b9276e1404f58fec3ab6e90a4f76b778a49373ccaf6a563f100dfbc/frozenlist-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a", size = 276357 },
    { url = "https://files.pythonhosted.org/packages/ad/c9/c7761084fa822f07dac38ac29f841d4587570dd211e2262544aa0b791d21/frozenlist-1.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869", size = 287516 },
    { url = "https://files.pythonhosted.org/packages/a1/ff/cd7479e703c39df7bdab431798cef89dc75010d8aa0ca2514c5b9321db27/frozenlist-1.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d", size = 283131 },
    { url = "https://files.pythonhosted.org/packages/59/a0/370941beb47d237eca4fbf27e4e91389fd68699e6f4b0ebcc95da463835b/frozenlist-1.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45", size = 261320 },
    { url = "https://files.pythonhosted.org/packages/b8/5f/c10123e8d64867bc9b4f2f510a32042a306ff5fcd7e2e09e5ae5100ee333/frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d", size = 274877 },
    { url = "https://files.pythonhosted.org/packages/fa/79/38c505601ae29d4348f21706c5d89755ceded02a745016ba2f58bd5f1ea6/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3", size = 269592 },
    { url = "https://files.pythonhosted.org/packages/19/e2/39f3a53191b8204ba9f0bb574b926b73dd2efba2a2b9d2d730517e8f7622/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a", size = 265934 },
    { url = "https://files.pythonhosted.org/packages/d5/c9/3075eb7f7f3a91f1a6b00284af4de0a65a9ae47084930916f5528144c9dd/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9", size = 283859 },
    { url = "https://files.pythonhosted.org/packages/05/f5/549f44d314c29408b962fa2b0e69a1a67c59379fb143b92a0a065ffd1f0f/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2", size = 287560 },
    { url = "https://files.pythonhosted.org/packages/9d/f8/cb09b3c24a3eac02c4c07a9558e11e9e244fb02bf62c85ac2106d1eb0c0b/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf", size = 277150 },
    { url = "https://files.pythonhosted.org/packages/37/48/38c2db3f54d1501e692d6fe058f45b6ad1b358d82cd19436efab80cfc965/frozenlist-1.5.0-cp311-cp311-win32.whl", hash = "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942", size = 45244 },
    { url = "https://files.pythonhosted.org/packages/ca/8c/2ddffeb8b60a4bce3b196c32fcc30d8830d4615e7b492ec2071da801b8ad/frozenlist-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d", size = 51634 },
    { url = "https://files.pythonhosted.org/packages/79/73/fa6d1a96ab7fd6e6d1c3500700963eab46813847f01ef0ccbaa726181dd5/frozenlist-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21", size = 94026 },
    { url = "https://files.pythonhosted.org/packages/ab/04/ea8bf62c8868b8eada363f20ff1b647cf2e93377a7b284d36062d21d81d1/frozenlist-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d", size = 54150 },
    { url = "https://files.pythonhosted.org/packages/d0/9a/8e479b482a6f2070b26bda572c5e6889bb3ba48977e81beea35b5ae13ece/frozenlist-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e", size = 51927 },
    { url = "https://files.pythonhosted.org/packages/e3/12/2aad87deb08a4e7ccfb33600871bbe8f0e08cb6d8224371387f3303654d7/frozenlist-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a", size = 282647 },
    { url = "https://files.pythonhosted.org/packages/77/f2/07f06b05d8a427ea0060a9cef6e63405ea9e0d761846b95ef3fb3be57111/frozenlist-1.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a", size = 289052 },
    { url = "https://files.pythonhosted.org/packages/bd/9f/8bf45a2f1cd4aa401acd271b077989c9267ae8463e7c8b1eb0d3f561b65e/frozenlist-1.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee", size = 291719 },
    { url = "https://files.pythonhosted.org/packages/41/d1/1f20fd05a6c42d3868709b7604c9f15538a29e4f734c694c6bcfc3d3b935/frozenlist-1.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6", size = 267433 },
    { url = "https://files.pythonhosted.org/packages/af/f2/64b73a9bb86f5a89fb55450e97cd5c1f84a862d4ff90d9fd1a73ab0f64a5/frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e", size = 283591 },
    { url = "https://files.pythonhosted.org/packages/29/e2/ffbb1fae55a791fd6c2938dd9ea779509c977435ba3940b9f2e8dc9d5316/frozenlist-1.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9", size = 273249 },
    { url = "https://files.pythonhosted.org/packages/2e/6e/008136a30798bb63618a114b9321b5971172a5abddff44a100c7edc5ad4f/frozenlist-1.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039", size = 271075 },
    { url = "https://files.pythonhosted.org/packages/ae/f0/4e71e54a026b06724cec9b6c54f0b13a4e9e298cc8db0f82ec70e151f5ce/frozenlist-1.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784", size = 285398 },
    { url = "https://files.pythonhosted.org/packages/4d/36/70ec246851478b1c0b59f11ef8ade9c482ff447c1363c2bd5fad45098b12/frozenlist-1.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631", size = 294445 },
    { url = "https://files.pythonhosted.org/packages/37/e0/47f87544055b3349b633a03c4d94b405956cf2437f4ab46d0928b74b7526/frozenlist-1.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f", size = 280569 },
    { url = "https://files.pythonhosted.org/packages/f9/7c/490133c160fb6b84ed374c266f42800e33b50c3bbab1652764e6e1fc498a/frozenlist-1.5.0-cp312-cp312-win32.whl", hash = "sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8", size = 44721 },
    { url = "https://files.pythonhosted.org/packages/b1/56/4e45136ffc6bdbfa68c29ca56ef53783ef4c2fd395f7cbf99a2624aa9aaa/frozenlist-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f", size = 51329 },
    { url = "https://files.pythonhosted.org/packages/da/3b/915f0bca8a7ea04483622e84a9bd90033bab54bdf485479556c74fd5eaf5/frozenlist-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953", size = 91538 },
    { url = "https://files.pythonhosted.org/packages/c7/d1/a7c98aad7e44afe5306a2b068434a5830f1470675f0e715abb86eb15f15b/frozenlist-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0", size = 52849 },
    { url = "https://files.pythonhosted.org/packages/3a/c8/76f23bf9ab15d5f760eb48701909645f686f9c64fbb8982674c241fbef14/frozenlist-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2", size = 50583 },
    { url = "https://files.pythonhosted.org/packages/1f/22/462a3dd093d11df623179d7754a3b3269de3b42de2808cddef50ee0f4f48/frozenlist-1.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f", size = 265636 },
    { url = "https://files.pythonhosted.org/packages/80/cf/e075e407fc2ae7328155a1cd7e22f932773c8073c1fc78016607d19cc3e5/frozenlist-1.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608", size = 270214 },
    { url = "https://files.pythonhosted.org/packages/a1/58/0642d061d5de779f39c50cbb00df49682832923f3d2ebfb0fedf02d05f7f/frozenlist-1.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b", size = 273905 },
    { url = "https://files.pythonhosted.org/packages/ab/66/3fe0f5f8f2add5b4ab7aa4e199f767fd3b55da26e3ca4ce2cc36698e50c4/frozenlist-1.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840", size = 250542 },
    { url = "https://files.pythonhosted.org/packages/f6/b8/260791bde9198c87a465224e0e2bb62c4e716f5d198fc3a1dacc4895dbd1/frozenlist-1.5.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439", size = 267026 },
    { url = "https://files.pythonhosted.org/packages/2e/a4/3d24f88c527f08f8d44ade24eaee83b2627793fa62fa07cbb7ff7a2f7d42/frozenlist-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de", size = 257690 },
    { url = "https://files.pythonhosted.org/packages/de/9a/d311d660420b2beeff3459b6626f2ab4fb236d07afbdac034a4371fe696e/frozenlist-1.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641", size = 253893 },
    { url = "https://files.pythonhosted.org/packages/c6/23/e491aadc25b56eabd0f18c53bb19f3cdc6de30b2129ee0bc39cd387cd560/frozenlist-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e", size = 267006 },
    { url = "https://files.pythonhosted.org/packages/08/c4/ab918ce636a35fb974d13d666dcbe03969592aeca6c3ab3835acff01f79c/frozenlist-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9", size = 276157 },
    { url = "https://files.pythonhosted.org/packages/c0/29/3b7a0bbbbe5a34833ba26f686aabfe982924adbdcafdc294a7a129c31688/frozenlist-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03", size = 264642 },
    { url = "https://files.pythonhosted.org/packages/ab/42/0595b3dbffc2e82d7fe658c12d5a5bafcd7516c6bf2d1d1feb5387caa9c1/frozenlist-1.5.0-cp313-cp313-win32.whl", hash = "sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c", size = 44914 },
    { url = "https://files.pythonhosted.org/packages/17/c4/b7db1206a3fea44bf3b838ca61deb6f74424a8a5db1dd53ecb21da669be6/frozenlist-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28", size = 51167 },
    { url = "https://files.pythonhosted.org/packages/c6/c8/a5be5b7550c10858fcf9b0ea054baccab474da77d37f1e828ce043a3a5d4/frozenlist-1.5.0-py3-none-any.whl", hash = "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3", size = 11901 },
 ]
 [[package]]
 name = "fsspec"
 version = "2024.12.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/ee/11/de70dee31455c546fbc88301971ec03c328f3d1138cfba14263f651e9551/fsspec-2024.12.0.tar.gz", hash = "sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f", size = 291600 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/de/86/5486b0188d08aa643e127774a99bac51ffa6cf343e3deb0583956dca5b22/fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2", size = 183862 },
 ]
 [package.optional-dependencies]
 http = [
    { name = "aiohttp" },
 ]
 [package.optional-dependencies]