Merge branch 'main' into patch-1

2025-08-15 14:08:00 +00:00 · 2025-07-29 14:46:11 -04:00 · 2025-07-29 14:46:11 -04:00 · 92c2edd61c
commit 92c2edd61c
parent b994465cc2 fee365b71e
35 changed files with 1916 additions and 1589 deletions
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -14,10 +14,18 @@ concurrency:
 jobs:
  pre-commit:
    runs-on: ubuntu-latest
    permissions:
      contents: write
      pull-requests: write
    steps:
      - name: Checkout code
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          # For dependabot PRs, we need to checkout with a token that can push changes
          token: ${{ github.actor == 'dependabot[bot]' && secrets.GITHUB_TOKEN || github.token }}
          # Fetch full history for dependabot PRs to allow commits
          fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
      - name: Set up Python
        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
@ -29,15 +37,45 @@ jobs:
            .pre-commit-config.yaml
      - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
        continue-on-error: true
        env:
          SKIP: no-commit-to-branch
          RUFF_OUTPUT_FORMAT: github
      - name: Debug
        run: |
          echo "github.ref: ${{ github.ref }}"
          echo "github.actor: ${{ github.actor }}"
      - name: Commit changes for dependabot PRs
        if: github.actor == 'dependabot[bot]'
        run: |
          if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then
            git config --local user.email "github-actions[bot]@users.noreply.github.com"
            git config --local user.name "github-actions[bot]"
            # Ensure we're on the correct branch
            git checkout -B ${{ github.head_ref }}
            git add -A
            git commit -m "Apply pre-commit fixes"
            # Pull latest changes from the PR branch and rebase our commit on top
            git pull --rebase origin ${{ github.head_ref }}
            # Push to the PR branch
            git push origin ${{ github.head_ref }}
            echo "Pre-commit fixes committed and pushed"
          else
            echo "No changes to commit"
          fi
      - name: Verify if there are any diff files after pre-commit
        if: github.actor != 'dependabot[bot]'
        run: |
          git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
      - name: Verify if there are any new files after pre-commit
        if: github.actor != 'dependabot[bot]'
        run: |
          unstaged_files=$(git ls-files --others --exclude-standard)
          if [ -n "$unstaged_files" ]; then
--- a/.github/workflows/test-external-provider-module.yml
+++ b/.github/workflows/test-external-provider-module.yml
@ -13,6 +13,7 @@ on:
      - 'uv.lock'
      - 'pyproject.toml'
      - 'requirements.txt'
      - 'tests/external/*'
      - '.github/workflows/test-external-provider-module.yml' # This workflow
 jobs:
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@ -13,6 +13,7 @@ on:
      - 'uv.lock'
      - 'pyproject.toml'
      - 'requirements.txt'
      - 'tests/external/*'
      - '.github/workflows/test-external.yml' # This workflow
 jobs:
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@ -35,6 +35,8 @@ jobs:
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
        with:
          python-version: ${{ matrix.python }}
      - name: Run unit tests
        run: |
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -19,7 +19,6 @@ repos:
    -   id: check-yaml
        args: ["--unsafe"]
    -   id: detect-private-key
    -   id: requirements-txt-fixer
    -   id: mixed-line-ending
        args: [--fix=lf] # Forces to replace line ending by LF (line feed)
    -   id: check-executables-have-shebangs
@ -56,14 +55,6 @@ repos:
    rev: 0.7.20
    hooks:
    -   id: uv-lock
    -   id: uv-export
        args: [
            "--frozen",
            "--no-hashes",
            "--no-emit-project",
            "--no-default-groups",
            "--output-file=requirements.txt"
        ]
 -   repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.16.1
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -9770,7 +9770,7 @@
                            {
                                "type": "array",
                                "items": {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
                                }
                            }
                        ],
@ -9821,13 +9821,17 @@
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIFile"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
-                        "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                        "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam",
                        "file": "#/components/schemas/OpenAIFile"
                    }
                }
            },
@ -9955,7 +9959,7 @@
                            {
                                "type": "array",
                                "items": {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
                                }
                            }
                        ],
@ -9974,6 +9978,41 @@
                "title": "OpenAIDeveloperMessageParam",
                "description": "A message from the developer in an OpenAI-compatible chat completion request."
            },
            "OpenAIFile": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "file",
                        "default": "file"
                    },
                    "file": {
                        "$ref": "#/components/schemas/OpenAIFileFile"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "file"
                ],
                "title": "OpenAIFile"
            },
            "OpenAIFileFile": {
                "type": "object",
                "properties": {
                    "file_data": {
                        "type": "string"
                    },
                    "file_id": {
                        "type": "string"
                    },
                    "filename": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "title": "OpenAIFileFile"
            },
            "OpenAIImageURL": {
                "type": "object",
                "properties": {
@ -10036,7 +10075,7 @@
                            {
                                "type": "array",
                                "items": {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
                                }
                            }
                        ],
@ -10107,7 +10146,7 @@
                            {
                                "type": "array",
                                "items": {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
                                }
                            }
                        ],
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -6895,7 +6895,7 @@ components:
            - type: string
            - type: array
              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          description: The content of the model's response
        name:
          type: string
@ -6934,11 +6934,13 @@ components:
      oneOf:
        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
        - $ref: '#/components/schemas/OpenAIFile'
      discriminator:
        propertyName: type
        mapping:
          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
          file: '#/components/schemas/OpenAIFile'
    OpenAIChatCompletionContentPartTextParam:
      type: object
      properties:
@ -7037,7 +7039,7 @@ components:
            - type: string
            - type: array
              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          description: The content of the developer message
        name:
          type: string
@ -7050,6 +7052,31 @@ components:
      title: OpenAIDeveloperMessageParam
      description: >-
        A message from the developer in an OpenAI-compatible chat completion request.
    OpenAIFile:
      type: object
      properties:
        type:
          type: string
          const: file
          default: file
        file:
          $ref: '#/components/schemas/OpenAIFileFile'
      additionalProperties: false
      required:
        - type
        - file
      title: OpenAIFile
    OpenAIFileFile:
      type: object
      properties:
        file_data:
          type: string
        file_id:
          type: string
        filename:
          type: string
      additionalProperties: false
      title: OpenAIFileFile
    OpenAIImageURL:
      type: object
      properties:
@ -7090,7 +7117,7 @@ components:
            - type: string
            - type: array
              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          description: >-
            The content of the "system prompt". If multiple system messages are provided,
            they are concatenated. The underlying Llama Stack code may also add other
@ -7148,7 +7175,7 @@ components:
            - type: string
            - type: array
              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          description: The response content from the tool
      additionalProperties: false
      required:
--- a/docs/source/distributions/importing_as_library.md
+++ b/docs/source/distributions/importing_as_library.md
@ -13,7 +13,7 @@ llama stack build --template starter --image-type venv
 from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
 client = LlamaStackAsLibraryClient(
-    "ollama",
+    "starter",
    # provider_data is optional, but if you need to pass in any provider specific data, you can do so here.
    provider_data={"tavily_search_api_key": os.environ["TAVILY_SEARCH_API_KEY"]},
 )
--- a/docs/source/providers/external.md
+++ b/docs/source/providers/external.md
@ -12,8 +12,7 @@ To enable external providers, you need to add `module` into your build yaml, all
 an example entry in your build.yaml should look like:
 ```
- provider_id: ramalama
+- provider_type: remote::ramalama
  provider_type: remote::ramalama
  module: ramalama_stack
 ```
@ -255,8 +254,7 @@ distribution_spec:
  container_image: null
  providers:
    inference:
-    - provider_id: ramalama
+    - provider_type: remote::ramalama
      provider_type: remote::ramalama
      module: ramalama_stack==0.3.0a0
 image_type: venv
 image_name: null
--- a/docs/source/providers/inference/remote_openai.md
+++ b/docs/source/providers/inference/remote_openai.md
@ -9,11 +9,13 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `api_key` | `str \| None` | No |  | API key for OpenAI models |
 | `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
 ## Sample Configuration
 ```yaml
 api_key: ${env.OPENAI_API_KEY:=}
 base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
 ```
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -455,8 +455,21 @@ class OpenAIChatCompletionContentPartImageParam(BaseModel):
    image_url: OpenAIImageURL
@json_schema_type
 class OpenAIFileFile(BaseModel):
    file_data: str | None = None
    file_id: str | None = None
    filename: str | None = None
@json_schema_type
 class OpenAIFile(BaseModel):
    type: Literal["file"] = "file"
    file: OpenAIFileFile
 OpenAIChatCompletionContentPartParam = Annotated[
-    OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile,
    Field(discriminator="type"),
 ]
 register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")
@ -464,6 +477,8 @@ register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletion
 OpenAIChatCompletionMessageContent = str | list[OpenAIChatCompletionContentPartParam]
 OpenAIChatCompletionTextOnlyMessageContent = str | list[OpenAIChatCompletionContentPartTextParam]
@json_schema_type
 class OpenAIUserMessageParam(BaseModel):
@ -489,7 +504,7 @@ class OpenAISystemMessageParam(BaseModel):
    """
    role: Literal["system"] = "system"
-    content: OpenAIChatCompletionMessageContent
+    content: OpenAIChatCompletionTextOnlyMessageContent
    name: str | None = None
@ -518,7 +533,7 @@ class OpenAIAssistantMessageParam(BaseModel):
    """
    role: Literal["assistant"] = "assistant"
-    content: OpenAIChatCompletionMessageContent | None = None
+    content: OpenAIChatCompletionTextOnlyMessageContent | None = None
    name: str | None = None
    tool_calls: list[OpenAIChatCompletionToolCall] | None = None
@ -534,7 +549,7 @@ class OpenAIToolMessageParam(BaseModel):
    role: Literal["tool"] = "tool"
    tool_call_id: str
-    content: OpenAIChatCompletionMessageContent
+    content: OpenAIChatCompletionTextOnlyMessageContent
@json_schema_type
@ -547,7 +562,7 @@ class OpenAIDeveloperMessageParam(BaseModel):
    """
    role: Literal["developer"] = "developer"
-    content: OpenAIChatCompletionMessageContent
+    content: OpenAIChatCompletionTextOnlyMessageContent
    name: str | None = None
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -18,10 +18,6 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
 # mounting is not supported by docker buildx, so we use COPY instead
 USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
 # Mount command for cache container .cache, can be overridden by the user if needed
 MOUNT_CACHE=${MOUNT_CACHE:-"--mount=type=cache,id=llama-stack-cache,target=/root/.cache"}
 # Path to the run.yaml file in the container
 RUN_CONFIG_PATH=/app/run.yaml
@ -176,18 +172,13 @@ RUN pip install uv
 EOF
 fi
 # Set the link mode to copy so that uv doesn't attempt to symlink to the cache directory
 add_to_container << EOF
 ENV UV_LINK_MODE=copy
 EOF
 # Add pip dependencies first since llama-stack is what will change most often
 # so we can reuse layers.
 if [ -n "$normal_deps" ]; then
  read -ra pip_args <<<  "$normal_deps"
  quoted_deps=$(printf " %q" "${pip_args[@]}")
  add_to_container << EOF
-RUN $MOUNT_CACHE uv pip install $quoted_deps
+RUN uv pip install --no-cache $quoted_deps
 EOF
 fi
@ -197,7 +188,7 @@ if [ -n "$optional_deps" ]; then
    read -ra pip_args <<< "$part"
    quoted_deps=$(printf " %q" "${pip_args[@]}")
    add_to_container <<EOF
-RUN $MOUNT_CACHE uv pip install $quoted_deps
+RUN uv pip install --no-cache $quoted_deps
 EOF
  done
 fi
@ -208,10 +199,10 @@ if [ -n "$external_provider_deps" ]; then
    read -ra pip_args <<< "$part"
    quoted_deps=$(printf " %q" "${pip_args[@]}")
    add_to_container <<EOF
-RUN $MOUNT_CACHE uv pip install $quoted_deps
+RUN uv pip install --no-cache $quoted_deps
 EOF
    add_to_container <<EOF
-RUN python3 - <<PYTHON | $MOUNT_CACHE uv pip install -r -
+RUN python3 - <<PYTHON | uv pip install --no-cache -r -
 import importlib
 import sys
@ -293,7 +284,7 @@ COPY $dir $mount_point
 EOF
  fi
  add_to_container << EOF
-RUN $MOUNT_CACHE uv pip install -e $mount_point
+RUN uv pip install --no-cache -e $mount_point
 EOF
 }
@ -308,10 +299,10 @@ else
  if [ -n "$TEST_PYPI_VERSION" ]; then
    # these packages are damaged in test-pypi, so install them first
    add_to_container << EOF
-RUN $MOUNT_CACHE uv pip install fastapi libcst
+RUN uv pip install --no-cache fastapi libcst
 EOF
    add_to_container << EOF
-RUN $MOUNT_CACHE uv pip install --extra-index-url https://test.pypi.org/simple/ \
+RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
  --index-strategy unsafe-best-match \
  llama-stack==$TEST_PYPI_VERSION
@ -323,7 +314,7 @@ EOF
      SPEC_VERSION="llama-stack"
    fi
    add_to_container << EOF
-RUN $MOUNT_CACHE uv pip install $SPEC_VERSION
+RUN uv pip install --no-cache $SPEC_VERSION
 EOF
  fi
 fi
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -358,7 +358,7 @@ async def shutdown_stack(impls: dict[Api, Any]):
 async def refresh_registry_once(impls: dict[Api, Any]):
-    logger.info("refreshing registry")
+    logger.debug("refreshing registry")
    routing_tables = [v for v in impls.values() if isinstance(v, CommonRoutingTableImpl)]
    for routing_table in routing_tables:
        await routing_table.refresh()
--- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
+++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@ -469,7 +469,7 @@ class HFFinetuningSingleDevice:
            use_cpu=True if device.type == "cpu" and not torch.backends.mps.is_available() else False,
            save_strategy=save_strategy,
            report_to="none",
-            max_seq_length=provider_config.max_seq_length,
+            max_length=provider_config.max_seq_length,
            gradient_accumulation_steps=config.gradient_accumulation_steps,
            gradient_checkpointing=provider_config.gradient_checkpointing,
            learning_rate=lr,
--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@ -32,7 +32,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
        LiteLLMOpenAIMixin.__init__(
            self,
            model_entries=MODEL_ENTRIES,
-            litellm_provider_name="llama",
+            litellm_provider_name="meta_llama",
            api_key_from_config=config.api_key,
            provider_data_api_key_field="llama_api_key",
            openai_compat_api_base=config.openai_compat_api_base,
--- a/llama_stack/providers/remote/inference/openai/config.py
+++ b/llama_stack/providers/remote/inference/openai/config.py
@ -24,9 +24,19 @@ class OpenAIConfig(BaseModel):
        default=None,
        description="API key for OpenAI models",
    )
    base_url: str = Field(
        default="https://api.openai.com/v1",
        description="Base URL for OpenAI API",
    )
    @classmethod
-    def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY:=}", **kwargs) -> dict[str, Any]:
+    def sample_run_config(
        cls,
        api_key: str = "${env.OPENAI_API_KEY:=}",
        base_url: str = "${env.OPENAI_BASE_URL:=https://api.openai.com/v1}",
        **kwargs,
    ) -> dict[str, Any]:
        return {
            "api_key": api_key,
            "base_url": base_url,
        }
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@ -65,9 +65,9 @@ class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
        """
        Get the OpenAI API base URL.
-        Returns the standard OpenAI API base URL for direct OpenAI API calls.
+        Returns the OpenAI API base URL from the configuration.
        """
-        return "https://api.openai.com/v1"
+        return self.config.base_url
    async def initialize(self) -> None:
        await super().initialize()
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@ -73,6 +73,15 @@ class LiteLLMOpenAIMixin(
        provider_data_api_key_field: str,
        openai_compat_api_base: str | None = None,
    ):
        """
        Initialize the LiteLLMOpenAIMixin.
        :param model_entries: The model entries to register.
        :param api_key_from_config: The API key to use from the config.
        :param provider_data_api_key_field: The field in the provider data that contains the API key.
        :param litellm_provider_name: The name of the provider, used for model lookups.
        :param openai_compat_api_base: The base URL for OpenAI compatibility, or None if not using OpenAI compatibility.
        """
        ModelRegistryHelper.__init__(self, model_entries)
        self.litellm_provider_name = litellm_provider_name
@ -428,3 +437,17 @@ class LiteLLMOpenAIMixin(
        logprobs: LogProbConfig | None = None,
    ):
        raise NotImplementedError("Batch chat completion is not supported for OpenAI Compat")
    async def check_model_availability(self, model: str) -> bool:
        """
        Check if a specific model is available via LiteLLM for the current
        provider (self.litellm_provider_name).
        :param model: The model identifier to check.
        :return: True if the model is available dynamically, False otherwise.
        """
        if self.litellm_provider_name not in litellm.models_by_provider:
            logger.error(f"Provider {self.litellm_provider_name} is not registered in litellm.")
            return False
        return model in litellm.models_by_provider[self.litellm_provider_name]
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@ -56,6 +56,7 @@ providers:
    provider_type: remote::openai
    config:
      api_key: ${env.OPENAI_API_KEY:=}
      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
  - provider_id: anthropic
    provider_type: remote::anthropic
    config:
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@ -16,6 +16,7 @@ providers:
    provider_type: remote::openai
    config:
      api_key: ${env.OPENAI_API_KEY:=}
      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
  - provider_id: anthropic
    provider_type: remote::anthropic
    config:
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@ -56,6 +56,7 @@ providers:
    provider_type: remote::openai
    config:
      api_key: ${env.OPENAI_API_KEY:=}
      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
  - provider_id: anthropic
    provider_type: remote::anthropic
    config:
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@ -20,7 +20,7 @@
    "@radix-ui/react-tooltip": "^1.2.6",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
-    "llama-stack-client": "^0.2.15",
+    "llama-stack-client": ""0.2.16",
    "lucide-react": "^0.510.0",
    "next": "15.3.3",
    "next-auth": "^4.24.11",
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "llama_stack"
-version = "0.2.15"
+version = "0.2.16"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
@ -28,7 +28,7 @@ dependencies = [
    "huggingface-hub>=0.34.0,<1.0",
    "jinja2>=3.1.6",
    "jsonschema",
-    "llama-stack-client>=0.2.15",
+    "llama-stack-client>=0.2.16",
    "llama-api-client>=0.1.2",
    "openai>=1.66",
    "prompt-toolkit",
@ -53,7 +53,7 @@ dependencies = [
 ui = [
    "streamlit",
    "pandas",
-    "llama-stack-client>=0.2.15",
+    "llama-stack-client>=0.2.16",
    "streamlit-option-menu",
 ]
@ -114,6 +114,7 @@ test = [
    "sqlalchemy[asyncio]>=2.0.41",
    "requests",
    "pymilvus>=2.5.12",
    "reportlab",
 ]
 docs = [
    "setuptools",
--- a/requirements.txt
+++ b/requirements.txt
@ -1,269 +0,0 @@
 # This file was autogenerated by uv via the following command:
 #    uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
 aiohappyeyeballs==2.5.0
    # via aiohttp
 aiohttp==3.12.13
    # via llama-stack
 aiosignal==1.3.2
    # via aiohttp
 aiosqlite==0.21.0
    # via llama-stack
 annotated-types==0.7.0
    # via pydantic
 anyio==4.8.0
    # via
    #   httpx
    #   llama-api-client
    #   llama-stack-client
    #   openai
    #   starlette
 asyncpg==0.30.0
    # via llama-stack
 attrs==25.1.0
    # via
    #   aiohttp
    #   jsonschema
    #   referencing
 certifi==2025.1.31
    # via
    #   httpcore
    #   httpx
    #   requests
 cffi==1.17.1 ; platform_python_implementation != 'PyPy'
    # via cryptography
 charset-normalizer==3.4.1
    # via requests
 click==8.1.8
    # via
    #   llama-stack-client
    #   uvicorn
 colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
 cryptography==45.0.5
    # via python-jose
 deprecated==1.2.18
    # via
    #   opentelemetry-api
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-semantic-conventions
 distro==1.9.0
    # via
    #   llama-api-client
    #   llama-stack-client
    #   openai
 ecdsa==0.19.1
    # via python-jose
 fastapi==0.115.8
    # via llama-stack
 filelock==3.17.0
    # via huggingface-hub
 fire==0.7.0
    # via
    #   llama-stack
    #   llama-stack-client
 frozenlist==1.5.0
    # via
    #   aiohttp
    #   aiosignal
 fsspec==2024.12.0
    # via huggingface-hub
 googleapis-common-protos==1.67.0
    # via opentelemetry-exporter-otlp-proto-http
 h11==0.16.0
    # via
    #   httpcore
    #   llama-stack
    #   uvicorn
 hf-xet==1.1.5 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
 httpcore==1.0.9
    # via httpx
 httpx==0.28.1
    # via
    #   llama-api-client
    #   llama-stack
    #   llama-stack-client
    #   openai
 huggingface-hub==0.34.1
    # via llama-stack
 idna==3.10
    # via
    #   anyio
    #   httpx
    #   requests
    #   yarl
 importlib-metadata==8.5.0
    # via opentelemetry-api
 jinja2==3.1.6
    # via llama-stack
 jiter==0.8.2
    # via openai
 jsonschema==4.23.0
    # via llama-stack
 jsonschema-specifications==2024.10.1
    # via jsonschema
 llama-api-client==0.1.2
    # via llama-stack
 llama-stack-client==0.2.15
    # via llama-stack
 markdown-it-py==3.0.0
    # via rich
 markupsafe==3.0.2
    # via jinja2
 mdurl==0.1.2
    # via markdown-it-py
 multidict==6.1.0
    # via
    #   aiohttp
    #   yarl
 numpy==2.2.3
    # via pandas
 openai==1.71.0
    # via llama-stack
 opentelemetry-api==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
 opentelemetry-exporter-otlp-proto-common==1.30.0
    # via opentelemetry-exporter-otlp-proto-http
 opentelemetry-exporter-otlp-proto-http==1.30.0
    # via llama-stack
 opentelemetry-proto==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-sdk==1.30.0
    # via
    #   llama-stack
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-semantic-conventions==0.51b0
    # via opentelemetry-sdk
 packaging==24.2
    # via huggingface-hub
 pandas==2.2.3
    # via llama-stack-client
 pillow==11.1.0
    # via llama-stack
 prompt-toolkit==3.0.50
    # via
    #   llama-stack
    #   llama-stack-client
 propcache==0.3.0
    # via
    #   aiohttp
    #   yarl
 protobuf==5.29.5
    # via
    #   googleapis-common-protos
    #   opentelemetry-proto
 pyaml==25.1.0
    # via llama-stack-client
 pyasn1==0.4.8
    # via
    #   python-jose
    #   rsa
 pycparser==2.22 ; platform_python_implementation != 'PyPy'
    # via cffi
 pydantic==2.10.6
    # via
    #   fastapi
    #   llama-api-client
    #   llama-stack
    #   llama-stack-client
    #   openai
 pydantic-core==2.27.2
    # via pydantic
 pygments==2.19.1
    # via rich
 python-dateutil==2.9.0.post0
    # via pandas
 python-dotenv==1.0.1
    # via llama-stack
 python-jose==3.4.0
    # via llama-stack
 python-multipart==0.0.20
    # via llama-stack
 pytz==2025.1
    # via pandas
 pyyaml==6.0.2
    # via
    #   huggingface-hub
    #   pyaml
 referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-specifications
 regex==2024.11.6
    # via tiktoken
 requests==2.32.4
    # via
    #   huggingface-hub
    #   llama-stack-client
    #   opentelemetry-exporter-otlp-proto-http
    #   tiktoken
 rich==13.9.4
    # via
    #   llama-stack
    #   llama-stack-client
 rpds-py==0.22.3
    # via
    #   jsonschema
    #   referencing
 rsa==4.9
    # via python-jose
 six==1.17.0
    # via
    #   ecdsa
    #   python-dateutil
 sniffio==1.3.1
    # via
    #   anyio
    #   llama-api-client
    #   llama-stack-client
    #   openai
 starlette==0.45.3
    # via
    #   fastapi
    #   llama-stack
 termcolor==2.5.0
    # via
    #   fire
    #   llama-stack
    #   llama-stack-client
 tiktoken==0.9.0
    # via llama-stack
 tqdm==4.67.1
    # via
    #   huggingface-hub
    #   llama-stack-client
    #   openai
 typing-extensions==4.12.2
    # via
    #   aiosqlite
    #   anyio
    #   fastapi
    #   huggingface-hub
    #   llama-api-client
    #   llama-stack-client
    #   openai
    #   opentelemetry-sdk
    #   pydantic
    #   pydantic-core
    #   referencing
 tzdata==2025.1
    # via pandas
 urllib3==2.5.0
    # via requests
 uvicorn==0.34.0
    # via llama-stack
 wcwidth==0.2.13
    # via prompt-toolkit
 wrapt==1.17.2
    # via deprecated
 yarl==1.18.3
    # via aiohttp
 zipp==3.21.0
    # via importlib-metadata
--- a/scripts/unit-tests.sh
+++ b/scripts/unit-tests.sh
@ -8,6 +8,15 @@
 PYTHON_VERSION=${PYTHON_VERSION:-3.12}
 set -e
 # Always run this at the end, even if something fails
 cleanup() {
    echo "Generating coverage report..."
    uv run --python "$PYTHON_VERSION" coverage html -d htmlcov-$PYTHON_VERSION
 }
 trap cleanup EXIT
 command -v uv >/dev/null 2>&1 || { echo >&2 "uv is required but it's not installed. Exiting."; exit 1; }
 uv python find "$PYTHON_VERSION"
@ -19,6 +28,3 @@ fi
 # Run unit tests with coverage
 uv run --python "$PYTHON_VERSION" --with-editable . --group unit \
    coverage run --source=llama_stack -m pytest -s -v tests/unit/ "$@"
 # Generate HTML coverage report
 uv run --python "$PYTHON_VERSION" coverage html -d htmlcov-$PYTHON_VERSION
--- a/tests/external/build.yaml
+++ b/tests/external/build.yaml
@ -3,8 +3,7 @@ distribution_spec:
  description: Custom distro for CI tests
  providers:
    weather:
-    - provider_id: kaze
+    - provider_type: remote::kaze
      provider_type: remote::kaze
 image_type: venv
 image_name: ci-test
 external_providers_dir: ~/.llama/providers.d
--- a/tests/external/ramalama-stack/build.yaml
+++ b/tests/external/ramalama-stack/build.yaml
@ -4,8 +4,7 @@ distribution_spec:
  container_image: null
  providers:
    inference:
-    - provider_id: ramalama
+    - provider_type: remote::ramalama
      provider_type: remote::ramalama
      module: ramalama_stack==0.3.0a0
 image_type: venv
 image_name: ramalama-stack-test
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -5,8 +5,14 @@
 # the root directory of this source tree.
 import base64
 import os
 import tempfile
 import pytest
 from openai import OpenAI
 from reportlab.lib.pagesizes import letter
 from reportlab.pdfgen import canvas
 from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
@ -82,6 +88,14 @@ def skip_if_provider_isnt_vllm(client_with_models, model_id):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")
 def skip_if_provider_isnt_openai(client_with_models, model_id):
    provider = provider_from_model(client_with_models, model_id)
    if provider.provider_type != "remote::openai":
        pytest.skip(
            f"Model {model_id} hosted by {provider.provider_type} doesn't support chat completion calls with base64 encoded files."
        )
@pytest.fixture
 def openai_client(client_with_models):
    base_url = f"{client_with_models.base_url}/v1/openai/v1"
@ -418,3 +432,45 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
        # failed tool call parses show up as a message with content, so ensure
        # that the retrieve response content matches the original request
        assert retrieved_response.choices[0].message.content == content
 def test_openai_chat_completion_non_streaming_with_file(openai_client, client_with_models, text_model_id):
    skip_if_provider_isnt_openai(client_with_models, text_model_id)
    # Generate temporary PDF with "Hello World" text
    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
        c = canvas.Canvas(temp_pdf.name, pagesize=letter)
        c.drawString(100, 750, "Hello World")
        c.save()
        # Read the PDF and sencode to base64
        with open(temp_pdf.name, "rb") as pdf_file:
            pdf_base64 = base64.b64encode(pdf_file.read()).decode("utf-8")
        # Clean up temporary file
        os.unlink(temp_pdf.name)
    response = openai_client.chat.completions.create(
        model=text_model_id,
        messages=[
            {
                "role": "user",
                "content": "Describe what you see in this PDF file.",
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "file",
                        "file": {
                            "filename": "my-temp-hello-world-pdf",
                            "file_data": f"data:application/pdf;base64,{pdf_base64}",
                        },
                    }
                ],
            },
        ],
        stream=False,
    )
    message_content = response.choices[0].message.content.lower().strip()
    assert "hello world" in message_content
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@ -38,9 +38,8 @@ sys.stdout.reconfigure(line_buffering=True)
 # How to run this test:
 #
-# pytest llama_stack/providers/tests/post_training/test_post_training.py
+# LLAMA_STACK_CONFIG=ci-tests uv run --dev pytest tests/integration/post_training/test_post_training.py
-#   -m "torchtune_post_training_huggingface_datasetio"
+#
 #   -v -s --tb=short --disable-warnings
 class TestPostTraining:
@ -113,6 +112,7 @@ class TestPostTraining:
                break
            logger.info(f"Current status: {status}")
            assert status.status in ["scheduled", "in_progress", "completed"]
            if status.status == "completed":
                break
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@ -346,7 +346,7 @@ pip_packages:
    def test_external_provider_from_module_building(self, mock_providers):
        """Test loading an external provider from a module during build (building=True, partial spec)."""
-        from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec, Provider
+        from llama_stack.distribution.datatypes import BuildConfig, BuildProvider, DistributionSpec
        from llama_stack.providers.datatypes import Api
        # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
@ -358,10 +358,8 @@ pip_packages:
                description="test",
                providers={
                    "inference": [
-                        Provider(
+                        BuildProvider(
                            provider_id="external_test",
                            provider_type="external_test",
                            config={},
                            module="external_test",
                        )
                    ]
--- a/tests/unit/providers/inference/test_openai_base_url_config.py
+++ b/tests/unit/providers/inference/test_openai_base_url_config.py
@ -0,0 +1,125 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import os
 from unittest.mock import AsyncMock, MagicMock, patch
 from llama_stack.distribution.stack import replace_env_vars
 from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter
 class TestOpenAIBaseURLConfig:
    """Test that OPENAI_BASE_URL environment variable properly configures the OpenAI adapter."""
    def test_default_base_url_without_env_var(self):
        """Test that the adapter uses the default OpenAI base URL when no environment variable is set."""
        config = OpenAIConfig(api_key="test-key")
        adapter = OpenAIInferenceAdapter(config)
        assert adapter.get_base_url() == "https://api.openai.com/v1"
    def test_custom_base_url_from_config(self):
        """Test that the adapter uses a custom base URL when provided in config."""
        custom_url = "https://custom.openai.com/v1"
        config = OpenAIConfig(api_key="test-key", base_url=custom_url)
        adapter = OpenAIInferenceAdapter(config)
        assert adapter.get_base_url() == custom_url
    @patch.dict(os.environ, {"OPENAI_BASE_URL": "https://env.openai.com/v1"})
    def test_base_url_from_environment_variable(self):
        """Test that the adapter uses base URL from OPENAI_BASE_URL environment variable."""
        # Use sample_run_config which has proper environment variable syntax
        config_data = OpenAIConfig.sample_run_config(api_key="test-key")
        processed_config = replace_env_vars(config_data)
        config = OpenAIConfig.model_validate(processed_config)
        adapter = OpenAIInferenceAdapter(config)
        assert adapter.get_base_url() == "https://env.openai.com/v1"
    @patch.dict(os.environ, {"OPENAI_BASE_URL": "https://env.openai.com/v1"})
    def test_config_overrides_environment_variable(self):
        """Test that explicit config value overrides environment variable."""
        custom_url = "https://config.openai.com/v1"
        config = OpenAIConfig(api_key="test-key", base_url=custom_url)
        adapter = OpenAIInferenceAdapter(config)
        # Config should take precedence over environment variable
        assert adapter.get_base_url() == custom_url
    @patch("llama_stack.providers.utils.inference.openai_mixin.AsyncOpenAI")
    def test_client_uses_configured_base_url(self, mock_openai_class):
        """Test that the OpenAI client is initialized with the configured base URL."""
        custom_url = "https://test.openai.com/v1"
        config = OpenAIConfig(api_key="test-key", base_url=custom_url)
        adapter = OpenAIInferenceAdapter(config)
        # Mock the get_api_key method since it's delegated to LiteLLMOpenAIMixin
        adapter.get_api_key = MagicMock(return_value="test-key")
        # Access the client property to trigger AsyncOpenAI initialization
        _ = adapter.client
        # Verify AsyncOpenAI was called with the correct base_url
        mock_openai_class.assert_called_once_with(
            api_key="test-key",
            base_url=custom_url,
        )
    @patch("llama_stack.providers.utils.inference.openai_mixin.AsyncOpenAI")
    async def test_check_model_availability_uses_configured_url(self, mock_openai_class):
        """Test that check_model_availability uses the configured base URL."""
        custom_url = "https://test.openai.com/v1"
        config = OpenAIConfig(api_key="test-key", base_url=custom_url)
        adapter = OpenAIInferenceAdapter(config)
        # Mock the get_api_key method
        adapter.get_api_key = MagicMock(return_value="test-key")
        # Mock the AsyncOpenAI client and its models.retrieve method
        mock_client = MagicMock()
        mock_client.models.retrieve = AsyncMock(return_value=MagicMock())
        mock_openai_class.return_value = mock_client
        # Call check_model_availability and verify it returns True
        assert await adapter.check_model_availability("gpt-4")
        # Verify the client was created with the custom URL
        mock_openai_class.assert_called_with(
            api_key="test-key",
            base_url=custom_url,
        )
        # Verify the method was called and returned True
        mock_client.models.retrieve.assert_called_once_with("gpt-4")
    @patch.dict(os.environ, {"OPENAI_BASE_URL": "https://proxy.openai.com/v1"})
    @patch("llama_stack.providers.utils.inference.openai_mixin.AsyncOpenAI")
    async def test_environment_variable_affects_model_availability_check(self, mock_openai_class):
        """Test that setting OPENAI_BASE_URL environment variable affects where model availability is checked."""
        # Use sample_run_config which has proper environment variable syntax
        config_data = OpenAIConfig.sample_run_config(api_key="test-key")
        processed_config = replace_env_vars(config_data)
        config = OpenAIConfig.model_validate(processed_config)
        adapter = OpenAIInferenceAdapter(config)
        # Mock the get_api_key method
        adapter.get_api_key = MagicMock(return_value="test-key")
        # Mock the AsyncOpenAI client
        mock_client = MagicMock()
        mock_client.models.retrieve = AsyncMock(return_value=MagicMock())
        mock_openai_class.return_value = mock_client
        # Call check_model_availability and verify it returns True
        assert await adapter.check_model_availability("gpt-4")
        # Verify the client was created with the environment variable URL
        mock_openai_class.assert_called_with(
            api_key="test-key",
            base_url="https://proxy.openai.com/v1",
        )
--- a/tests/unit/providers/utils/inference/test_openai_compat.py
+++ b/tests/unit/providers/utils/inference/test_openai_compat.py
@ -4,13 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import pytest
 from pydantic import ValidationError
 from llama_stack.apis.common.content_types import TextContentItem
 from llama_stack.apis.inference import (
    CompletionMessage,
    OpenAIAssistantMessageParam,
    OpenAIChatCompletionContentPartImageParam,
    OpenAIChatCompletionContentPartTextParam,
    OpenAIDeveloperMessageParam,
    OpenAIImageURL,
    OpenAISystemMessageParam,
    OpenAIToolMessageParam,
    OpenAIUserMessageParam,
    SystemMessage,
    UserMessage,
@ -108,3 +114,71 @@ async def test_openai_messages_to_messages_with_content_list():
    assert llama_messages[0].content[0].text == "system message"
    assert llama_messages[1].content[0].text == "user message"
    assert llama_messages[2].content[0].text == "assistant message"
@pytest.mark.parametrize(
    "message_class,kwargs",
    [
        (OpenAISystemMessageParam, {}),
        (OpenAIAssistantMessageParam, {}),
        (OpenAIDeveloperMessageParam, {}),
        (OpenAIUserMessageParam, {}),
        (OpenAIToolMessageParam, {"tool_call_id": "call_123"}),
    ],
 )
 def test_message_accepts_text_string(message_class, kwargs):
    """Test that messages accept string text content."""
    msg = message_class(content="Test message", **kwargs)
    assert msg.content == "Test message"
@pytest.mark.parametrize(
    "message_class,kwargs",
    [
        (OpenAISystemMessageParam, {}),
        (OpenAIAssistantMessageParam, {}),
        (OpenAIDeveloperMessageParam, {}),
        (OpenAIUserMessageParam, {}),
        (OpenAIToolMessageParam, {"tool_call_id": "call_123"}),
    ],
 )
 def test_message_accepts_text_list(message_class, kwargs):
    """Test that messages accept list of text content parts."""
    content_list = [OpenAIChatCompletionContentPartTextParam(text="Test message")]
    msg = message_class(content=content_list, **kwargs)
    assert len(msg.content) == 1
    assert msg.content[0].text == "Test message"
@pytest.mark.parametrize(
    "message_class,kwargs",
    [
        (OpenAISystemMessageParam, {}),
        (OpenAIAssistantMessageParam, {}),
        (OpenAIDeveloperMessageParam, {}),
        (OpenAIToolMessageParam, {"tool_call_id": "call_123"}),
    ],
 )
 def test_message_rejects_images(message_class, kwargs):
    """Test that system, assistant, developer, and tool messages reject image content."""
    with pytest.raises(ValidationError):
        message_class(
            content=[
                OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url="http://example.com/image.jpg"))
            ],
            **kwargs,
        )
 def test_user_message_accepts_images():
    """Test that user messages accept image content (unlike other message types)."""
    # List with images should work
    msg = OpenAIUserMessageParam(
        content=[
            OpenAIChatCompletionContentPartTextParam(text="Describe this image:"),
            OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url="http://example.com/image.jpg")),
        ]
    )
    assert len(msg.content) == 2
    assert msg.content[0].text == "Describe this image:"
    assert msg.content[1].image_url.url == "http://example.com/image.jpg"
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@ -162,26 +162,29 @@ async def test_register_model_existing_different(
        await helper.register_model(known_model)
-async def test_unregister_model(helper: ModelRegistryHelper, known_model: Model) -> None:
+# TODO: unregister_model functionality was removed/disabled by https://github.com/meta-llama/llama-stack/pull/2916
-    await helper.register_model(known_model)  # duplicate entry
+# async def test_unregister_model(helper: ModelRegistryHelper, known_model: Model) -> None:
-    assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_model_id
+#     await helper.register_model(known_model)  # duplicate entry
-    await helper.unregister_model(known_model.model_id)
+#     assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_model_id
-    assert helper.get_provider_model_id(known_model.model_id) is None
+#     await helper.unregister_model(known_model.model_id)
 #     assert helper.get_provider_model_id(known_model.model_id) is None
-async def test_unregister_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
+# TODO: unregister_model functionality was removed/disabled by https://github.com/meta-llama/llama-stack/pull/2916
-    with pytest.raises(ValueError):
+# async def test_unregister_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
-        await helper.unregister_model(unknown_model.model_id)
+#     with pytest.raises(ValueError):
 #         await helper.unregister_model(unknown_model.model_id)
 async def test_register_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None:
    assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id
-async def test_unregister_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None:
+# TODO: unregister_model functionality was removed/disabled by https://github.com/meta-llama/llama-stack/pull/2916
-    assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id
+# async def test_unregister_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None:
-    await helper.unregister_model(known_model.provider_resource_id)
+#     assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id
-    assert helper.get_provider_model_id(known_model.provider_resource_id) is None
+#     await helper.unregister_model(known_model.provider_resource_id)
 #     assert helper.get_provider_model_id(known_model.provider_resource_id) is None
 async def test_register_model_from_check_model_availability(
--- a/tests/unit/server/test_auth_github.py
+++ b/tests/unit/server/test_auth_github.py
@ -49,7 +49,7 @@ def github_token_app():
    )
    # Add auth middleware
-    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})
    @app.get("/test")
    def test_endpoint():
@ -149,7 +149,7 @@ def test_github_enterprise_support(mock_client_class):
        access_policy=[],
    )
-    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})
    @app.get("/test")
    def test_endpoint():
--- a/uv.lock
+++ b/uv.lock