Merge branch 'main' into fix/issue-2584-llama4-tool-calling-v2

2025-12-24 07:28:03 +00:00 · 2025-07-28 09:06:49 -04:00 · 2025-07-28 09:06:49 -04:00 · 09f42d9d91
commit 09f42d9d91
parent 561912064c 09abdb0a37
183 changed files with 6226 additions and 5684 deletions
--- a/tests/external-provider/llama-stack-provider-ollama/README.md
+++ b/tests/external-provider/llama-stack-provider-ollama/README.md
@ -1,3 +0,0 @@
-# Ollama external provider for Llama Stack
-
-Template code to create a new external provider for Llama Stack.
--- a/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
@ -1,7 +0,0 @@
-adapter:
-  adapter_type: custom_ollama
-  pip_packages: ["ollama", "aiohttp", "tests/external-provider/llama-stack-provider-ollama"]
-  config_class: llama_stack_provider_ollama.config.OllamaImplConfig
-  module: llama_stack_provider_ollama
-api_dependencies: []
-optional_api_dependencies: []
--- a/tests/external-provider/llama-stack-provider-ollama/pyproject.toml
+++ b/tests/external-provider/llama-stack-provider-ollama/pyproject.toml
@ -1,43 +0,0 @@
-[project]
-dependencies = [
-    "llama-stack",
-    "pydantic",
-    "ollama",
-    "aiohttp",
-    "aiosqlite",
-    "autoevals",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-]
-
-name = "llama-stack-provider-ollama"
-version = "0.1.0"
-description = "External provider for Ollama using the Llama Stack API"
-readme = "README.md"
-requires-python = ">=3.12"
--- a/tests/external-provider/llama-stack-provider-ollama/run.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml
@ -1,124 +0,0 @@
-version: 2
-image_name: ollama
-apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
-
-providers:
-  inference:
-  - provider_id: ollama
-    provider_type: remote::ollama
-    config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      metadata_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      agents_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200b}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      metadata_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      metadata_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      metadata_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
-models:
- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: custom_ollama
-  model_type: llm
- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: custom_ollama
-  provider_model_id: all-minilm:l6-v2
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-server:
-  port: 8321
-external_providers_dir: ~/.llama/providers.d
--- a/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
@ -2,8 +2,10 @@ version: '2'
 distribution_spec:
  description: Custom distro for CI tests
  providers:
-    inference:
-    - remote::custom_ollama
-image_type: container
+    weather:
+    - provider_id: kaze
+      provider_type: remote::kaze
+image_type: venv
 image_name: ci-test
 external_providers_dir: ~/.llama/providers.d
+external_apis_dir: ~/.llama/apis.d
--- a/tests/external/kaze.yaml
+++ b/tests/external/kaze.yaml
@ -0,0 +1,6 @@
+adapter:
+  adapter_type: kaze
+  pip_packages: ["tests/external/llama-stack-provider-kaze"]
+  config_class: llama_stack_provider_kaze.config.KazeProviderConfig
+  module: llama_stack_provider_kaze
+optional_api_dependencies: []
--- a/tests/external/llama-stack-api-weather/pyproject.toml
+++ b/tests/external/llama-stack-api-weather/pyproject.toml
@ -0,0 +1,15 @@
+[project]
+name = "llama-stack-api-weather"
+version = "0.1.0"
+description = "Weather API for Llama Stack"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = ["llama-stack", "pydantic"]
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["llama_stack_api_weather", "llama_stack_api_weather.*"]
--- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/init.py
+++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/init.py
@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Weather API for Llama Stack."""
+
+from .weather import WeatherProvider, available_providers
+
+__all__ = ["WeatherProvider", "available_providers"]
--- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
+++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Protocol
+
+from llama_stack.providers.datatypes import AdapterSpec, Api, ProviderSpec, RemoteProviderSpec
+from llama_stack.schema_utils import webmethod
+
+
+def available_providers() -> list[ProviderSpec]:
+    return [
+        RemoteProviderSpec(
+            api=Api.weather,
+            provider_type="remote::kaze",
+            config_class="llama_stack_provider_kaze.KazeProviderConfig",
+            adapter=AdapterSpec(
+                adapter_type="kaze",
+                module="llama_stack_provider_kaze",
+                pip_packages=["llama_stack_provider_kaze"],
+                config_class="llama_stack_provider_kaze.KazeProviderConfig",
+            ),
+        ),
+    ]
+
+
+class WeatherProvider(Protocol):
+    """
+    A protocol for the Weather API.
+    """
+
+    @webmethod(route="/weather/locations", method="GET")
+    async def get_available_locations() -> dict[str, list[str]]:
+        """
+        Get the available locations.
+        """
+        ...
--- a/tests/external/llama-stack-provider-kaze/pyproject.toml
+++ b/tests/external/llama-stack-provider-kaze/pyproject.toml
@ -0,0 +1,15 @@
+[project]
+name = "llama-stack-provider-kaze"
+version = "0.1.0"
+description = "Kaze weather provider for Llama Stack"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = ["llama-stack", "pydantic", "aiohttp"]
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["llama_stack_provider_kaze", "llama_stack_provider_kaze.*"]
--- a/tests/external/llama-stack-provider-kaze/src/llama_stack_provider_kaze/init.py
+++ b/tests/external/llama-stack-provider-kaze/src/llama_stack_provider_kaze/init.py
@ -0,0 +1,20 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Kaze weather provider for Llama Stack."""
+
+from .config import KazeProviderConfig
+from .kaze import WeatherKazeAdapter
+
+__all__ = ["KazeProviderConfig", "WeatherKazeAdapter"]
+
+
+async def get_adapter_impl(config: KazeProviderConfig, _deps):
+    from .kaze import WeatherKazeAdapter
+
+    impl = WeatherKazeAdapter(config)
+    await impl.initialize()
+    return impl
--- a/tests/external/llama-stack-provider-kaze/src/llama_stack_provider_kaze/config.py
+++ b/tests/external/llama-stack-provider-kaze/src/llama_stack_provider_kaze/config.py
@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pydantic import BaseModel
+
+
+class KazeProviderConfig(BaseModel):
+    """Configuration for the Kaze weather provider."""
--- a/tests/external/llama-stack-provider-kaze/src/llama_stack_provider_kaze/kaze.py
+++ b/tests/external/llama-stack-provider-kaze/src/llama_stack_provider_kaze/kaze.py
@ -0,0 +1,26 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack_api_weather.weather import WeatherProvider
+
+from .config import KazeProviderConfig
+
+
+class WeatherKazeAdapter(WeatherProvider):
+    """Kaze weather provider implementation."""
+
+    def __init__(
+        self,
+        config: KazeProviderConfig,
+    ) -> None:
+        self.config = config
+
+    async def initialize(self) -> None:
+        pass
+
+    async def get_available_locations(self) -> dict[str, list[str]]:
+        """Get available weather locations."""
+        return {"locations": ["Paris", "Tokyo"]}
--- a/tests/external/ramalama-stack/build.yaml
+++ b/tests/external/ramalama-stack/build.yaml
@ -0,0 +1,14 @@
+version: 2
+distribution_spec:
+  description: Use (an external) Ramalama server for running LLM inference
+  container_image: null
+  providers:
+    inference:
+    - provider_id: ramalama
+      provider_type: remote::ramalama
+      module: ramalama_stack==0.3.0a0
+image_type: venv
+image_name: ramalama-stack-test
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
--- a/tests/external/ramalama-stack/run.yaml
+++ b/tests/external/ramalama-stack/run.yaml
@ -0,0 +1,12 @@
+version: 2
+image_name: ramalama
+apis:
+- inference
+providers:
+  inference:
+  - provider_id: ramalama
+    provider_type: remote::ramalama
+    module: ramalama_stack==0.3.0a0
+    config: {}
+server:
+  port: 8321
--- a/tests/external/run-byoa.yaml
+++ b/tests/external/run-byoa.yaml
@ -0,0 +1,13 @@
+version: "2"
+image_name: "llama-stack-api-weather"
+apis:
+  - weather
+providers:
+  weather:
+  - provider_id: kaze
+    provider_type: remote::kaze
+    config: {}
+external_apis_dir: ~/.llama/apis.d
+external_providers_dir: ~/.llama/providers.d
+server:
+  port: 8321
--- a/tests/external/weather.yaml
+++ b/tests/external/weather.yaml
@ -0,0 +1,4 @@
+module: llama_stack_api_weather
+name: weather
+pip_packages: ["tests/external/llama-stack-api-weather"]
+protocol: WeatherProvider
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -179,9 +179,7 @@ def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_model
        model=text_model_id,
        prompt=prompt,
        stream=False,
-        extra_body={
-            "prompt_logprobs": prompt_logprobs,
-        },
+        prompt_logprobs=prompt_logprobs,
    )
    assert len(response.choices) > 0
    choice = response.choices[0]
@ -196,9 +194,7 @@ def test_openai_completion_guided_choice(llama_stack_client, client_with_models,
        model=text_model_id,
        prompt=prompt,
        stream=False,
-        extra_body={
-            "guided_choice": ["joy", "sadness"],
-        },
+        guided_choice=["joy", "sadness"],
    )
    assert len(response.choices) > 0
    choice = response.choices[0]
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -20,22 +20,15 @@ logger = logging.getLogger(__name__)


 def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models):
-    vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"]
-    for p in vector_io_providers:
-        if p.provider_type in ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "inline::chromadb"]:
-            return
-
-    pytest.skip("OpenAI vector stores are not supported by any provider")
-
-
-def skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models):
    vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"]
    for p in vector_io_providers:
        if p.provider_type in [
            "inline::faiss",
            "inline::sqlite-vec",
            "inline::milvus",
+            "inline::chromadb",
            "remote::pgvector",
+            "remote::chromadb",
        ]:
            return

@ -457,7 +450,6 @@ def test_openai_vector_store_search_with_max_num_results(
 def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store attach file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@ -509,7 +501,6 @@ def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client
 def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store attach files on creation."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@ -566,7 +557,6 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
 def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store list files."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
@ -640,7 +630,6 @@ def test_openai_vector_store_list_files_invalid_vector_store(compat_client_with_
 def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store retrieve file contents."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files retrieve contents is not yet supported with LlamaStackClient")
@ -682,7 +671,6 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
 def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store delete file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
@ -735,12 +723,9 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
    assert updated_vector_store.file_counts.in_progress == 0


-# TODO: Remove this xfail once we have a way to remove embeddings from vector store
-@pytest.mark.xfail(reason="Vector Store Files delete doesn't remove embeddings from vector store", strict=True)
 def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store delete file removes from vector store."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@ -782,7 +767,6 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client
 def test_openai_vector_store_update_file(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store update file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files update is not yet supported with LlamaStackClient")
@ -831,7 +815,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(compat_client_wit
    This test confirms that client.vector_stores.create() creates a unique ID
    """
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files create is not yet supported with LlamaStackClient")
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@ -15,6 +15,7 @@ from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.shields.shields import Shield
 from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter
 from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.distribution.datatypes import RegistryEntrySource
 from llama_stack.distribution.routing_tables.benchmarks import BenchmarksRoutingTable
 from llama_stack.distribution.routing_tables.datasets import DatasetsRoutingTable
 from llama_stack.distribution.routing_tables.models import ModelsRoutingTable
@ -45,6 +46,30 @@ class InferenceImpl(Impl):
    async def unregister_model(self, model_id: str):
        return model_id

+    async def should_refresh_models(self):
+        return False
+
+    async def list_models(self):
+        return [
+            Model(
+                identifier="provider-model-1",
+                provider_resource_id="provider-model-1",
+                provider_id="test_provider",
+                metadata={},
+                model_type=ModelType.llm,
+            ),
+            Model(
+                identifier="provider-model-2",
+                provider_resource_id="provider-model-2",
+                provider_id="test_provider",
+                metadata={"embedding_dimension": 512},
+                model_type=ModelType.embedding,
+            ),
+        ]
+
+    async def shutdown(self):
+        pass
+

 class SafetyImpl(Impl):
    def __init__(self):
@ -378,3 +403,170 @@ async def test_models_fallback_lookup_behavior(cached_disk_dist_registry):
        raise AssertionError("Should have raised ValueError for non-existent model")
    except ValueError as e:
        assert "not found" in str(e)
+
+
+async def test_models_source_tracking_default(cached_disk_dist_registry):
+    """Test that models registered via register_model get default source."""
+    table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
+    await table.initialize()
+
+    # Register model via register_model (should get default source)
+    await table.register_model(model_id="user-model", provider_id="test_provider")
+
+    models = await table.list_models()
+    assert len(models.data) == 1
+    model = models.data[0]
+    assert model.source == RegistryEntrySource.via_register_api
+    assert model.identifier == "test_provider/user-model"
+
+    # Cleanup
+    await table.shutdown()
+
+
+async def test_models_source_tracking_provider(cached_disk_dist_registry):
+    """Test that models registered via update_registered_models get provider source."""
+    table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
+    await table.initialize()
+
+    # Simulate provider refresh by calling update_registered_models
+    provider_models = [
+        Model(
+            identifier="provider-model-1",
+            provider_resource_id="provider-model-1",
+            provider_id="test_provider",
+            metadata={},
+            model_type=ModelType.llm,
+        ),
+        Model(
+            identifier="provider-model-2",
+            provider_resource_id="provider-model-2",
+            provider_id="test_provider",
+            metadata={"embedding_dimension": 512},
+            model_type=ModelType.embedding,
+        ),
+    ]
+    await table.update_registered_models("test_provider", provider_models)
+
+    models = await table.list_models()
+    assert len(models.data) == 2
+
+    # All models should have provider source
+    for model in models.data:
+        assert model.source == RegistryEntrySource.listed_from_provider
+        assert model.provider_id == "test_provider"
+
+    # Cleanup
+    await table.shutdown()
+
+
+async def test_models_source_interaction_preserves_default(cached_disk_dist_registry):
+    """Test that provider refresh preserves user-registered models with default source."""
+    table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
+    await table.initialize()
+
+    # First register a user model with same provider_resource_id as provider will later provide
+    await table.register_model(
+        model_id="my-custom-alias", provider_model_id="provider-model-1", provider_id="test_provider"
+    )
+
+    # Verify user model is registered with default source
+    models = await table.list_models()
+    assert len(models.data) == 1
+    user_model = models.data[0]
+    assert user_model.source == RegistryEntrySource.via_register_api
+    assert user_model.identifier == "my-custom-alias"
+    assert user_model.provider_resource_id == "provider-model-1"
+
+    # Now simulate provider refresh
+    provider_models = [
+        Model(
+            identifier="provider-model-1",
+            provider_resource_id="provider-model-1",
+            provider_id="test_provider",
+            metadata={},
+            model_type=ModelType.llm,
+        ),
+        Model(
+            identifier="different-model",
+            provider_resource_id="different-model",
+            provider_id="test_provider",
+            metadata={},
+            model_type=ModelType.llm,
+        ),
+    ]
+    await table.update_registered_models("test_provider", provider_models)
+
+    # Verify user model with alias is preserved, but provider added new model
+    models = await table.list_models()
+    assert len(models.data) == 2
+
+    # Find the user model and provider model
+    user_model = next((m for m in models.data if m.identifier == "my-custom-alias"), None)
+    provider_model = next((m for m in models.data if m.identifier == "test_provider/different-model"), None)
+
+    assert user_model is not None
+    assert user_model.source == RegistryEntrySource.via_register_api
+    assert user_model.provider_resource_id == "provider-model-1"
+
+    assert provider_model is not None
+    assert provider_model.source == RegistryEntrySource.listed_from_provider
+    assert provider_model.provider_resource_id == "different-model"
+
+    # Cleanup
+    await table.shutdown()
+
+
+async def test_models_source_interaction_cleanup_provider_models(cached_disk_dist_registry):
+    """Test that provider refresh removes old provider models but keeps default ones."""
+    table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
+    await table.initialize()
+
+    # Register a user model
+    await table.register_model(model_id="user-model", provider_id="test_provider")
+
+    # Add some provider models
+    provider_models_v1 = [
+        Model(
+            identifier="provider-model-old",
+            provider_resource_id="provider-model-old",
+            provider_id="test_provider",
+            metadata={},
+            model_type=ModelType.llm,
+        ),
+    ]
+    await table.update_registered_models("test_provider", provider_models_v1)
+
+    # Verify we have both user and provider models
+    models = await table.list_models()
+    assert len(models.data) == 2
+
+    # Now update with new provider models (should remove old provider models)
+    provider_models_v2 = [
+        Model(
+            identifier="provider-model-new",
+            provider_resource_id="provider-model-new",
+            provider_id="test_provider",
+            metadata={},
+            model_type=ModelType.llm,
+        ),
+    ]
+    await table.update_registered_models("test_provider", provider_models_v2)
+
+    # Should have user model + new provider model, old provider model gone
+    models = await table.list_models()
+    assert len(models.data) == 2
+
+    identifiers = {m.identifier for m in models.data}
+    assert "test_provider/user-model" in identifiers  # User model preserved
+    assert "test_provider/provider-model-new" in identifiers  # New provider model (uses provider's identifier)
+    assert "test_provider/provider-model-old" not in identifiers  # Old provider model removed
+
+    # Verify sources are correct
+    user_model = next((m for m in models.data if m.identifier == "test_provider/user-model"), None)
+    provider_model = next((m for m in models.data if m.identifier == "test_provider/provider-model-new"), None)
+
+    assert user_model.source == RegistryEntrySource.via_register_api
+    assert provider_model.source == RegistryEntrySource.listed_from_provider
+
+    # Cleanup
+    await table.shutdown()
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@ -106,6 +106,40 @@ def api_directories(tmp_path):
    return remote_inference_dir, inline_inference_dir


+def make_import_module_side_effect(
+    builtin_provider_spec=None,
+    external_module=None,
+    raise_for_external=False,
+    missing_get_provider_spec=False,
+):
+    from types import SimpleNamespace
+
+    def import_module_side_effect(name):
+        if name == "llama_stack.providers.registry.inference":
+            mock_builtin = SimpleNamespace(
+                available_providers=lambda: [
+                    builtin_provider_spec
+                    or ProviderSpec(
+                        api=Api.inference,
+                        provider_type="test_provider",
+                        config_class="test_provider.config.TestProviderConfig",
+                        module="test_provider",
+                    )
+                ]
+            )
+            return mock_builtin
+        elif name == "external_test.provider":
+            if raise_for_external:
+                raise ModuleNotFoundError(name)
+            if missing_get_provider_spec:
+                return SimpleNamespace()
+            return external_module
+        else:
+            raise ModuleNotFoundError(name)
+
+    return import_module_side_effect
+
+
 class TestProviderRegistry:
    """Test suite for provider registry functionality."""

@ -221,3 +255,124 @@ pip_packages:
        with pytest.raises(KeyError) as exc_info:
            get_provider_registry(base_config)
        assert "config_class" in str(exc_info.value)
+
+    def test_external_provider_from_module_success(self, mock_providers):
+        """Test loading an external provider from a module (success path)."""
+        from types import SimpleNamespace
+
+        from llama_stack.distribution.datatypes import Provider, StackRunConfig
+        from llama_stack.providers.datatypes import Api, ProviderSpec
+
+        # Simulate a provider module with get_provider_spec
+        fake_spec = ProviderSpec(
+            api=Api.inference,
+            provider_type="external_test",
+            config_class="external_test.config.ExternalTestConfig",
+            module="external_test",
+        )
+        fake_module = SimpleNamespace(get_provider_spec=lambda: fake_spec)
+
+        import_module_side_effect = make_import_module_side_effect(external_module=fake_module)
+
+        with patch("importlib.import_module", side_effect=import_module_side_effect) as mock_import:
+            config = StackRunConfig(
+                image_name="test_image",
+                providers={
+                    "inference": [
+                        Provider(
+                            provider_id="external_test",
+                            provider_type="external_test",
+                            config={},
+                            module="external_test",
+                        )
+                    ]
+                },
+            )
+            registry = get_provider_registry(config)
+            assert Api.inference in registry
+            assert "external_test" in registry[Api.inference]
+            provider = registry[Api.inference]["external_test"]
+            assert provider.module == "external_test"
+            assert provider.config_class == "external_test.config.ExternalTestConfig"
+            mock_import.assert_any_call("llama_stack.providers.registry.inference")
+            mock_import.assert_any_call("external_test.provider")
+
+    def test_external_provider_from_module_not_found(self, mock_providers):
+        """Test handling ModuleNotFoundError for missing provider module."""
+        from llama_stack.distribution.datatypes import Provider, StackRunConfig
+
+        import_module_side_effect = make_import_module_side_effect(raise_for_external=True)
+
+        with patch("importlib.import_module", side_effect=import_module_side_effect):
+            config = StackRunConfig(
+                image_name="test_image",
+                providers={
+                    "inference": [
+                        Provider(
+                            provider_id="external_test",
+                            provider_type="external_test",
+                            config={},
+                            module="external_test",
+                        )
+                    ]
+                },
+            )
+            with pytest.raises(ValueError) as exc_info:
+                get_provider_registry(config)
+            assert "get_provider_spec not found" in str(exc_info.value)
+
+    def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers):
+        """Test handling missing get_provider_spec in provider module (should raise ValueError)."""
+        from llama_stack.distribution.datatypes import Provider, StackRunConfig
+
+        import_module_side_effect = make_import_module_side_effect(missing_get_provider_spec=True)
+
+        with patch("importlib.import_module", side_effect=import_module_side_effect):
+            config = StackRunConfig(
+                image_name="test_image",
+                providers={
+                    "inference": [
+                        Provider(
+                            provider_id="external_test",
+                            provider_type="external_test",
+                            config={},
+                            module="external_test",
+                        )
+                    ]
+                },
+            )
+            with pytest.raises(AttributeError):
+                get_provider_registry(config)
+
+    def test_external_provider_from_module_building(self, mock_providers):
+        """Test loading an external provider from a module during build (building=True, partial spec)."""
+        from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec, Provider
+        from llama_stack.providers.datatypes import Api
+
+        # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
+        build_config = BuildConfig(
+            version=2,
+            image_type="container",
+            image_name="test_image",
+            distribution_spec=DistributionSpec(
+                description="test",
+                providers={
+                    "inference": [
+                        Provider(
+                            provider_id="external_test",
+                            provider_type="external_test",
+                            config={},
+                            module="external_test",
+                        )
+                    ]
+                },
+            ),
+        )
+        registry = get_provider_registry(build_config)
+        assert Api.inference in registry
+        assert "external_test" in registry[Api.inference]
+        provider = registry[Api.inference]["external_test"]
+        assert provider.module == "external_test"
+        assert provider.is_external is True
+        # config_class is empty string in partial spec
+        assert provider.config_class == ""
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@ -5,321 +5,353 @@
 # the root directory of this source tree.

 import os
-import unittest
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

 from llama_stack.apis.inference import CompletionMessage, UserMessage
+from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.safety import RunShieldResponse, ViolationLevel
 from llama_stack.apis.shields import Shield
+from llama_stack.models.llama.datatypes import StopReason
 from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
 from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter


-class TestNVIDIASafetyAdapter(unittest.TestCase):
-    def setUp(self):
-        os.environ["NVIDIA_GUARDRAILS_URL"] = "http://nemo.test"
+class TestNVIDIASafetyAdapter(NVIDIASafetyAdapter):
+    """Test implementation that provides the required shield_store."""

-        # Initialize the adapter
-        self.config = NVIDIASafetyConfig(
-            guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
-        )
-        self.adapter = NVIDIASafetyAdapter(config=self.config)
-        self.shield_store = AsyncMock()
-        self.adapter.shield_store = self.shield_store
+    def __init__(self, config: NVIDIASafetyConfig, shield_store):
+        super().__init__(config)
+        self.shield_store = shield_store

-        # Mock the HTTP request methods
-        self.guardrails_post_patcher = patch(
-            "llama_stack.providers.remote.safety.nvidia.nvidia.NeMoGuardrails._guardrails_post"
-        )
-        self.mock_guardrails_post = self.guardrails_post_patcher.start()
-        self.mock_guardrails_post.return_value = {"status": "allowed"}

-    def tearDown(self):
-        """Clean up after each test."""
-        self.guardrails_post_patcher.stop()
+@pytest.fixture
+def nvidia_adapter():
+    """Set up the NVIDIASafetyAdapter for testing."""
+    os.environ["NVIDIA_GUARDRAILS_URL"] = "http://nemo.test"

-    @pytest.fixture(autouse=True)
-    def inject_fixtures(self, run_async):
-        self.run_async = run_async
+    # Initialize the adapter
+    config = NVIDIASafetyConfig(
+        guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
+    )

-    def _assert_request(
-        self,
-        mock_call: MagicMock,
-        expected_url: str,
-        expected_headers: dict[str, str] | None = None,
-        expected_json: dict[str, Any] | None = None,
-    ) -> None:
-        """
-        Helper method to verify request details in mock API calls.
+    # Create a mock shield store that implements the ShieldStore protocol
+    shield_store = AsyncMock()
+    shield_store.get_shield = AsyncMock()

-        Args:
-            mock_call: The MagicMock object that was called
-            expected_url: The expected URL to which the request was made
-            expected_headers: Optional dictionary of expected request headers
-            expected_json: Optional dictionary of expected JSON payload
-        """
-        call_args = mock_call.call_args
+    adapter = TestNVIDIASafetyAdapter(config=config, shield_store=shield_store)

-        # Check URL
-        assert call_args[0][0] == expected_url
+    return adapter

-        # Check headers if provided
-        if expected_headers:
-            for key, value in expected_headers.items():
-                assert call_args[1]["headers"][key] == value

-        # Check JSON if provided
-        if expected_json:
-            for key, value in expected_json.items():
-                if isinstance(value, dict):
-                    for nested_key, nested_value in value.items():
-                        assert call_args[1]["json"][key][nested_key] == nested_value
-                else:
-                    assert call_args[1]["json"][key] == value
+@pytest.fixture
+def mock_guardrails_post():
+    """Mock the HTTP request methods."""
+    with patch("llama_stack.providers.remote.safety.nvidia.nvidia.NeMoGuardrails._guardrails_post") as mock_post:
+        mock_post.return_value = {"status": "allowed"}
+        yield mock_post

-    def test_register_shield_with_valid_id(self):
-        shield = Shield(
-            provider_id="nvidia",
-            type="shield",
-            identifier="test-shield",
-            provider_resource_id="test-model",
-        )

-        # Register the shield
-        self.run_async(self.adapter.register_shield(shield))
+def _assert_request(
+    mock_call: MagicMock,
+    expected_url: str,
+    expected_headers: dict[str, str] | None = None,
+    expected_json: dict[str, Any] | None = None,
+) -> None:
+    """
+    Helper method to verify request details in mock API calls.

-    def test_register_shield_without_id(self):
-        shield = Shield(
-            provider_id="nvidia",
-            type="shield",
-            identifier="test-shield",
-            provider_resource_id="",
-        )
+    Args:
+        mock_call: The MagicMock object that was called
+        expected_url: The expected URL to which the request was made
+        expected_headers: Optional dictionary of expected request headers
+        expected_json: Optional dictionary of expected JSON payload
+    """
+    call_args = mock_call.call_args

-        # Register the shield should raise a ValueError
-        with self.assertRaises(ValueError):
-            self.run_async(self.adapter.register_shield(shield))
+    # Check URL
+    assert call_args[0][0] == expected_url

-    def test_run_shield_allowed(self):
-        # Set up the shield
-        shield_id = "test-shield"
-        shield = Shield(
-            provider_id="nvidia",
-            type="shield",
-            identifier=shield_id,
-            provider_resource_id="test-model",
-        )
-        self.shield_store.get_shield.return_value = shield
+    # Check headers if provided
+    if expected_headers:
+        for key, value in expected_headers.items():
+            assert call_args[1]["headers"][key] == value

-        # Mock Guardrails API response
-        self.mock_guardrails_post.return_value = {"status": "allowed"}
+    # Check JSON if provided
+    if expected_json:
+        for key, value in expected_json.items():
+            if isinstance(value, dict):
+                for nested_key, nested_value in value.items():
+                    assert call_args[1]["json"][key][nested_key] == nested_value
+            else:
+                assert call_args[1]["json"][key] == value

-        # Run the shield
-        messages = [
-            UserMessage(role="user", content="Hello, how are you?"),
-            CompletionMessage(
-                role="assistant",
-                content="I'm doing well, thank you for asking!",
-                stop_reason="end_of_message",
-                tool_calls=[],
-            ),
-        ]
-        result = self.run_async(self.adapter.run_shield(shield_id, messages))

-        # Verify the shield store was called
-        self.shield_store.get_shield.assert_called_once_with(shield_id)
+async def test_register_shield_with_valid_id(nvidia_adapter):
+    adapter = nvidia_adapter

-        # Verify the Guardrails API was called correctly
-        self.mock_guardrails_post.assert_called_once_with(
-            path="/v1/guardrail/checks",
-            data={
-                "model": shield_id,
-                "messages": [
-                    {"role": "user", "content": "Hello, how are you?"},
-                    {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
-                ],
-                "temperature": 1.0,
-                "top_p": 1,
-                "frequency_penalty": 0,
-                "presence_penalty": 0,
-                "max_tokens": 160,
-                "stream": False,
-                "guardrails": {
-                    "config_id": "self-check",
-                },
+    shield = Shield(
+        provider_id="nvidia",
+        type=ResourceType.shield,
+        identifier="test-shield",
+        provider_resource_id="test-model",
+    )
+
+    # Register the shield
+    await adapter.register_shield(shield)
+
+
+async def test_register_shield_without_id(nvidia_adapter):
+    adapter = nvidia_adapter
+
+    shield = Shield(
+        provider_id="nvidia",
+        type=ResourceType.shield,
+        identifier="test-shield",
+        provider_resource_id="",
+    )
+
+    # Register the shield should raise a ValueError
+    with pytest.raises(ValueError):
+        await adapter.register_shield(shield)
+
+
+async def test_run_shield_allowed(nvidia_adapter, mock_guardrails_post):
+    adapter = nvidia_adapter
+
+    # Set up the shield
+    shield_id = "test-shield"
+    shield = Shield(
+        provider_id="nvidia",
+        type=ResourceType.shield,
+        identifier=shield_id,
+        provider_resource_id="test-model",
+    )
+    adapter.shield_store.get_shield.return_value = shield
+
+    # Mock Guardrails API response
+    mock_guardrails_post.return_value = {"status": "allowed"}
+
+    # Run the shield
+    messages = [
+        UserMessage(role="user", content="Hello, how are you?"),
+        CompletionMessage(
+            role="assistant",
+            content="I'm doing well, thank you for asking!",
+            stop_reason=StopReason.end_of_message,
+            tool_calls=[],
+        ),
+    ]
+    result = await adapter.run_shield(shield_id, messages)
+
+    # Verify the shield store was called
+    adapter.shield_store.get_shield.assert_called_once_with(shield_id)
+
+    # Verify the Guardrails API was called correctly
+    mock_guardrails_post.assert_called_once_with(
+        path="/v1/guardrail/checks",
+        data={
+            "model": shield_id,
+            "messages": [
+                {"role": "user", "content": "Hello, how are you?"},
+                {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
+            ],
+            "temperature": 1.0,
+            "top_p": 1,
+            "frequency_penalty": 0,
+            "presence_penalty": 0,
+            "max_tokens": 160,
+            "stream": False,
+            "guardrails": {
+                "config_id": "self-check",
            },
-        )
+        },
+    )

-        # Verify the result
-        assert isinstance(result, RunShieldResponse)
-        assert result.violation is None
+    # Verify the result
+    assert isinstance(result, RunShieldResponse)
+    assert result.violation is None

-    def test_run_shield_blocked(self):
-        # Set up the shield
-        shield_id = "test-shield"
-        shield = Shield(
-            provider_id="nvidia",
-            type="shield",
-            identifier=shield_id,
-            provider_resource_id="test-model",
-        )
-        self.shield_store.get_shield.return_value = shield

-        # Mock Guardrails API response
-        self.mock_guardrails_post.return_value = {"status": "blocked", "rails_status": {"reason": "harmful_content"}}
+async def test_run_shield_blocked(nvidia_adapter, mock_guardrails_post):
+    adapter = nvidia_adapter

-        # Run the shield
-        messages = [
-            UserMessage(role="user", content="Hello, how are you?"),
-            CompletionMessage(
-                role="assistant",
-                content="I'm doing well, thank you for asking!",
-                stop_reason="end_of_message",
-                tool_calls=[],
-            ),
-        ]
-        result = self.run_async(self.adapter.run_shield(shield_id, messages))
+    # Set up the shield
+    shield_id = "test-shield"
+    shield = Shield(
+        provider_id="nvidia",
+        type=ResourceType.shield,
+        identifier=shield_id,
+        provider_resource_id="test-model",
+    )
+    adapter.shield_store.get_shield.return_value = shield

-        # Verify the shield store was called
-        self.shield_store.get_shield.assert_called_once_with(shield_id)
+    # Mock Guardrails API response
+    mock_guardrails_post.return_value = {"status": "blocked", "rails_status": {"reason": "harmful_content"}}

-        # Verify the Guardrails API was called correctly
-        self.mock_guardrails_post.assert_called_once_with(
-            path="/v1/guardrail/checks",
-            data={
-                "model": shield_id,
-                "messages": [
-                    {"role": "user", "content": "Hello, how are you?"},
-                    {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
-                ],
-                "temperature": 1.0,
-                "top_p": 1,
-                "frequency_penalty": 0,
-                "presence_penalty": 0,
-                "max_tokens": 160,
-                "stream": False,
-                "guardrails": {
-                    "config_id": "self-check",
-                },
+    # Run the shield
+    messages = [
+        UserMessage(role="user", content="Hello, how are you?"),
+        CompletionMessage(
+            role="assistant",
+            content="I'm doing well, thank you for asking!",
+            stop_reason=StopReason.end_of_message,
+            tool_calls=[],
+        ),
+    ]
+    result = await adapter.run_shield(shield_id, messages)
+
+    # Verify the shield store was called
+    adapter.shield_store.get_shield.assert_called_once_with(shield_id)
+
+    # Verify the Guardrails API was called correctly
+    mock_guardrails_post.assert_called_once_with(
+        path="/v1/guardrail/checks",
+        data={
+            "model": shield_id,
+            "messages": [
+                {"role": "user", "content": "Hello, how are you?"},
+                {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
+            ],
+            "temperature": 1.0,
+            "top_p": 1,
+            "frequency_penalty": 0,
+            "presence_penalty": 0,
+            "max_tokens": 160,
+            "stream": False,
+            "guardrails": {
+                "config_id": "self-check",
            },
-        )
+        },
+    )

-        # Verify the result
-        assert result.violation is not None
-        assert isinstance(result, RunShieldResponse)
-        assert result.violation.user_message == "Sorry I cannot do this."
-        assert result.violation.violation_level == ViolationLevel.ERROR
-        assert result.violation.metadata == {"reason": "harmful_content"}
+    # Verify the result
+    assert result.violation is not None
+    assert isinstance(result, RunShieldResponse)
+    assert result.violation.user_message == "Sorry I cannot do this."
+    assert result.violation.violation_level == ViolationLevel.ERROR
+    assert result.violation.metadata == {"reason": "harmful_content"}

-    def test_run_shield_not_found(self):
-        # Set up shield store to return None
-        shield_id = "non-existent-shield"
-        self.shield_store.get_shield.return_value = None

-        messages = [
-            UserMessage(role="user", content="Hello, how are you?"),
-        ]
+async def test_run_shield_not_found(nvidia_adapter, mock_guardrails_post):
+    adapter = nvidia_adapter

-        with self.assertRaises(ValueError):
-            self.run_async(self.adapter.run_shield(shield_id, messages))
+    # Set up shield store to return None
+    shield_id = "non-existent-shield"
+    adapter.shield_store.get_shield.return_value = None

-        # Verify the shield store was called
-        self.shield_store.get_shield.assert_called_once_with(shield_id)
+    messages = [
+        UserMessage(role="user", content="Hello, how are you?"),
+    ]

-        # Verify the Guardrails API was not called
-        self.mock_guardrails_post.assert_not_called()
+    with pytest.raises(ValueError):
+        await adapter.run_shield(shield_id, messages)

-    def test_run_shield_http_error(self):
-        shield_id = "test-shield"
-        shield = Shield(
-            provider_id="nvidia",
-            type="shield",
-            identifier=shield_id,
-            provider_resource_id="test-model",
-        )
-        self.shield_store.get_shield.return_value = shield
+    # Verify the shield store was called
+    adapter.shield_store.get_shield.assert_called_once_with(shield_id)

-        # Mock Guardrails API to raise an exception
-        error_msg = "API Error: 500 Internal Server Error"
-        self.mock_guardrails_post.side_effect = Exception(error_msg)
+    # Verify the Guardrails API was not called
+    mock_guardrails_post.assert_not_called()

-        # Running the shield should raise an exception
-        messages = [
-            UserMessage(role="user", content="Hello, how are you?"),
-            CompletionMessage(
-                role="assistant",
-                content="I'm doing well, thank you for asking!",
-                stop_reason="end_of_message",
-                tool_calls=[],
-            ),
-        ]
-        with self.assertRaises(Exception) as context:
-            self.run_async(self.adapter.run_shield(shield_id, messages))

-        # Verify the shield store was called
-        self.shield_store.get_shield.assert_called_once_with(shield_id)
+async def test_run_shield_http_error(nvidia_adapter, mock_guardrails_post):
+    adapter = nvidia_adapter

-        # Verify the Guardrails API was called correctly
-        self.mock_guardrails_post.assert_called_once_with(
-            path="/v1/guardrail/checks",
-            data={
-                "model": shield_id,
-                "messages": [
-                    {"role": "user", "content": "Hello, how are you?"},
-                    {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
-                ],
-                "temperature": 1.0,
-                "top_p": 1,
-                "frequency_penalty": 0,
-                "presence_penalty": 0,
-                "max_tokens": 160,
-                "stream": False,
-                "guardrails": {
-                    "config_id": "self-check",
-                },
+    shield_id = "test-shield"
+    shield = Shield(
+        provider_id="nvidia",
+        type=ResourceType.shield,
+        identifier=shield_id,
+        provider_resource_id="test-model",
+    )
+    adapter.shield_store.get_shield.return_value = shield
+
+    # Mock Guardrails API to raise an exception
+    error_msg = "API Error: 500 Internal Server Error"
+    mock_guardrails_post.side_effect = Exception(error_msg)
+
+    # Running the shield should raise an exception
+    messages = [
+        UserMessage(role="user", content="Hello, how are you?"),
+        CompletionMessage(
+            role="assistant",
+            content="I'm doing well, thank you for asking!",
+            stop_reason=StopReason.end_of_message,
+            tool_calls=[],
+        ),
+    ]
+    with pytest.raises(Exception) as exc_info:
+        await adapter.run_shield(shield_id, messages)
+
+    # Verify the shield store was called
+    adapter.shield_store.get_shield.assert_called_once_with(shield_id)
+
+    # Verify the Guardrails API was called correctly
+    mock_guardrails_post.assert_called_once_with(
+        path="/v1/guardrail/checks",
+        data={
+            "model": shield_id,
+            "messages": [
+                {"role": "user", "content": "Hello, how are you?"},
+                {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
+            ],
+            "temperature": 1.0,
+            "top_p": 1,
+            "frequency_penalty": 0,
+            "presence_penalty": 0,
+            "max_tokens": 160,
+            "stream": False,
+            "guardrails": {
+                "config_id": "self-check",
            },
-        )
-        # Verify the exception message
-        assert error_msg in str(context.exception)
+        },
+    )
+    # Verify the exception message
+    assert error_msg in str(exc_info.value)

-    def test_init_nemo_guardrails(self):
-        from llama_stack.providers.remote.safety.nvidia.nvidia import NeMoGuardrails

-        test_config_id = "test-custom-config-id"
-        config = NVIDIASafetyConfig(
-            guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
-            config_id=test_config_id,
-        )
-        # Initialize with default parameters
-        test_model = "test-model"
-        guardrails = NeMoGuardrails(config, test_model)
+def test_init_nemo_guardrails():
+    from llama_stack.providers.remote.safety.nvidia.nvidia import NeMoGuardrails

-        # Verify the attributes are set correctly
-        assert guardrails.config_id == test_config_id
-        assert guardrails.model == test_model
-        assert guardrails.threshold == 0.9  # Default value
-        assert guardrails.temperature == 1.0  # Default value
-        assert guardrails.guardrails_service_url == os.environ["NVIDIA_GUARDRAILS_URL"]
+    os.environ["NVIDIA_GUARDRAILS_URL"] = "http://nemo.test"

-        # Initialize with custom parameters
-        guardrails = NeMoGuardrails(config, test_model, threshold=0.8, temperature=0.7)
+    test_config_id = "test-custom-config-id"
+    config = NVIDIASafetyConfig(
+        guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
+        config_id=test_config_id,
+    )
+    # Initialize with default parameters
+    test_model = "test-model"
+    guardrails = NeMoGuardrails(config, test_model)

-        # Verify the attributes are set correctly
-        assert guardrails.config_id == test_config_id
-        assert guardrails.model == test_model
-        assert guardrails.threshold == 0.8
-        assert guardrails.temperature == 0.7
-        assert guardrails.guardrails_service_url == os.environ["NVIDIA_GUARDRAILS_URL"]
+    # Verify the attributes are set correctly
+    assert guardrails.config_id == test_config_id
+    assert guardrails.model == test_model
+    assert guardrails.threshold == 0.9  # Default value
+    assert guardrails.temperature == 1.0  # Default value
+    assert guardrails.guardrails_service_url == os.environ["NVIDIA_GUARDRAILS_URL"]

-    def test_init_nemo_guardrails_invalid_temperature(self):
-        from llama_stack.providers.remote.safety.nvidia.nvidia import NeMoGuardrails
+    # Initialize with custom parameters
+    guardrails = NeMoGuardrails(config, test_model, threshold=0.8, temperature=0.7)

-        config = NVIDIASafetyConfig(
-            guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
-            config_id="test-custom-config-id",
-        )
-        with self.assertRaises(ValueError):
-            NeMoGuardrails(config, "test-model", temperature=0)
+    # Verify the attributes are set correctly
+    assert guardrails.config_id == test_config_id
+    assert guardrails.model == test_model
+    assert guardrails.threshold == 0.8
+    assert guardrails.temperature == 0.7
+    assert guardrails.guardrails_service_url == os.environ["NVIDIA_GUARDRAILS_URL"]
+
+
+def test_init_nemo_guardrails_invalid_temperature():
+    from llama_stack.providers.remote.safety.nvidia.nvidia import NeMoGuardrails
+
+    os.environ["NVIDIA_GUARDRAILS_URL"] = "http://nemo.test"
+
+    config = NVIDIASafetyConfig(
+        guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
+        config_id="test-custom-config-id",
+    )
+    with pytest.raises(ValueError):
+        NeMoGuardrails(config, "test-model", temperature=0)
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@ -8,6 +8,7 @@ import random

 import numpy as np
 import pytest
+from chromadb import PersistentClient
 from pymilvus import MilvusClient, connections

 from llama_stack.apis.vector_dbs import VectorDB
@ -18,7 +19,7 @@ from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, Faiss
 from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
-from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter
+from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter, maybe_await
 from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusIndex, MilvusVectorIOAdapter

 EMBEDDING_DIMENSION = 384
@ -26,6 +27,11 @@ COLLECTION_PREFIX = "test_collection"
 MILVUS_ALIAS = "test_milvus"


+@pytest.fixture(params=["milvus", "sqlite_vec", "faiss", "chroma"])
+def vector_provider(request):
+    return request.param
+
+
@pytest.fixture
 def vector_db_id() -> str:
    return f"test-vector-db-{random.randint(1, 100)}"
@ -94,11 +100,6 @@ def sample_embeddings_with_metadata(sample_chunks_with_metadata):
    return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks_with_metadata])


-@pytest.fixture(params=["milvus", "sqlite_vec", "faiss"])
-def vector_provider(request):
-    return request.param
-
-
@pytest.fixture(scope="session")
 def mock_inference_api(embedding_dimension):
    class MockInferenceAPI:
@ -246,10 +247,10 @@ def chroma_vec_db_path(tmp_path_factory):

@pytest.fixture
 async def chroma_vec_index(chroma_vec_db_path, embedding_dimension):
-    index = ChromaIndex(
-        embedding_dimension=embedding_dimension,
-        persist_directory=chroma_vec_db_path,
-    )
+    client = PersistentClient(path=chroma_vec_db_path)
+    name = f"{COLLECTION_PREFIX}_{np.random.randint(1e6)}"
+    collection = await maybe_await(client.get_or_create_collection(name))
+    index = ChromaIndex(client=client, collection=collection)
    await index.initialize()
    yield index
    await index.delete()
@ -257,7 +258,10 @@ async def chroma_vec_index(chroma_vec_db_path, embedding_dimension):

@pytest.fixture
 async def chroma_vec_adapter(chroma_vec_db_path, mock_inference_api, embedding_dimension):
-    config = ChromaVectorIOConfig(persist_directory=chroma_vec_db_path)
+    config = ChromaVectorIOConfig(
+        db_path=chroma_vec_db_path,
+        kvstore=SqliteKVStoreConfig(),
+    )
    adapter = ChromaVectorIOAdapter(
        config=config,
        inference_api=mock_inference_api,
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@ -86,10 +86,14 @@ async def test_register_and_unregister_vector_db(vector_io_adapter):
    assert dummy.identifier not in vector_io_adapter.cache


-async def test_query_unregistered_raises(vector_io_adapter):
+async def test_query_unregistered_raises(vector_io_adapter, vector_provider):
    fake_emb = np.zeros(8, dtype=np.float32)
-    with pytest.raises(ValueError):
-        await vector_io_adapter.query_chunks("no_such_db", fake_emb)
+    if vector_provider == "chroma":
+        with pytest.raises(AttributeError):
+            await vector_io_adapter.query_chunks("no_such_db", fake_emb)
+    else:
+        with pytest.raises(ValueError):
+            await vector_io_adapter.query_chunks("no_such_db", fake_emb)


 async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@ -19,7 +19,8 @@ from llama_stack.distribution.datatypes import (
    OAuth2JWKSConfig,
    OAuth2TokenAuthConfig,
 )
-from llama_stack.distribution.server.auth import AuthenticationMiddleware
+from llama_stack.distribution.request_headers import User
+from llama_stack.distribution.server.auth import AuthenticationMiddleware, _has_required_scope
 from llama_stack.distribution.server.auth_providers import (
    get_attributes_from_claims,
 )
@ -73,7 +74,7 @@ def http_app(mock_auth_endpoint):
        ),
        access_policy=[],
    )
-    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})

    @app.get("/test")
    def test_endpoint():
@ -111,7 +112,50 @@ def mock_http_middleware(mock_auth_endpoint):
        ),
        access_policy=[],
    )
-    return AuthenticationMiddleware(mock_app, auth_config), mock_app
+    return AuthenticationMiddleware(mock_app, auth_config, {}), mock_app
+
+
+@pytest.fixture
+def mock_impls():
+    """Mock implementations for scope testing"""
+    return {}
+
+
+@pytest.fixture
+def scope_middleware_with_mocks(mock_auth_endpoint):
+    """Create AuthenticationMiddleware with mocked route implementations"""
+    mock_app = AsyncMock()
+    auth_config = AuthenticationConfig(
+        provider_config=CustomAuthConfig(
+            type=AuthProviderType.CUSTOM,
+            endpoint=mock_auth_endpoint,
+        ),
+        access_policy=[],
+    )
+    middleware = AuthenticationMiddleware(mock_app, auth_config, {})
+
+    # Mock the route_impls to simulate finding routes with required scopes
+    from llama_stack.schema_utils import WebMethod
+
+    scoped_webmethod = WebMethod(route="/test/scoped", method="POST", required_scope="test.read")
+
+    public_webmethod = WebMethod(route="/test/public", method="GET")
+
+    # Mock the route finding logic
+    def mock_find_matching_route(method, path, route_impls):
+        if method == "POST" and path == "/test/scoped":
+            return None, {}, "/test/scoped", scoped_webmethod
+        elif method == "GET" and path == "/test/public":
+            return None, {}, "/test/public", public_webmethod
+        else:
+            raise ValueError("No matching route")
+
+    import llama_stack.distribution.server.auth
+
+    llama_stack.distribution.server.auth.find_matching_route = mock_find_matching_route
+    llama_stack.distribution.server.auth.initialize_route_impls = lambda impls: {}
+
+    return middleware, mock_app


 async def mock_post_success(*args, **kwargs):
@ -138,6 +182,36 @@ async def mock_post_exception(*args, **kwargs):
    raise Exception("Connection error")


+async def mock_post_success_with_scope(*args, **kwargs):
+    """Mock auth response for user with test.read scope"""
+    return MockResponse(
+        200,
+        {
+            "message": "Authentication successful",
+            "principal": "test-user",
+            "attributes": {
+                "scopes": ["test.read", "other.scope"],
+                "roles": ["user"],
+            },
+        },
+    )
+
+
+async def mock_post_success_no_scope(*args, **kwargs):
+    """Mock auth response for user without required scope"""
+    return MockResponse(
+        200,
+        {
+            "message": "Authentication successful",
+            "principal": "test-user",
+            "attributes": {
+                "scopes": ["other.scope"],
+                "roles": ["user"],
+            },
+        },
+    )
+
+
 # HTTP Endpoint Tests
 def test_missing_auth_header(http_client):
    response = http_client.get("/test")
@ -252,7 +326,7 @@ def oauth2_app():
        ),
        access_policy=[],
    )
-    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})

    @app.get("/test")
    def test_endpoint():
@ -351,7 +425,7 @@ def oauth2_app_with_jwks_token():
        ),
        access_policy=[],
    )
-    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})

    @app.get("/test")
    def test_endpoint():
@ -442,7 +516,7 @@ def introspection_app(mock_introspection_endpoint):
        ),
        access_policy=[],
    )
-    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})

    @app.get("/test")
    def test_endpoint():
@ -472,7 +546,7 @@ def introspection_app_with_custom_mapping(mock_introspection_endpoint):
        ),
        access_policy=[],
    )
-    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
+    app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})

    @app.get("/test")
    def test_endpoint():
@ -581,3 +655,122 @@ def test_valid_introspection_with_custom_mapping_authentication(
    )
    assert response.status_code == 200
    assert response.json() == {"message": "Authentication successful"}
+
+
+# Scope-based authorization tests
+@patch("httpx.AsyncClient.post", new=mock_post_success_with_scope)
+async def test_scope_authorization_success(scope_middleware_with_mocks, valid_api_key):
+    """Test that user with required scope can access protected endpoint"""
+    middleware, mock_app = scope_middleware_with_mocks
+    mock_receive = AsyncMock()
+    mock_send = AsyncMock()
+
+    scope = {
+        "type": "http",
+        "path": "/test/scoped",
+        "method": "POST",
+        "headers": [(b"authorization", f"Bearer {valid_api_key}".encode())],
+    }
+
+    await middleware(scope, mock_receive, mock_send)
+
+    # Should call the downstream app (no 403 error sent)
+    mock_app.assert_called_once_with(scope, mock_receive, mock_send)
+    mock_send.assert_not_called()
+
+
+@patch("httpx.AsyncClient.post", new=mock_post_success_no_scope)
+async def test_scope_authorization_denied(scope_middleware_with_mocks, valid_api_key):
+    """Test that user without required scope gets 403 access denied"""
+    middleware, mock_app = scope_middleware_with_mocks
+    mock_receive = AsyncMock()
+    mock_send = AsyncMock()
+
+    scope = {
+        "type": "http",
+        "path": "/test/scoped",
+        "method": "POST",
+        "headers": [(b"authorization", f"Bearer {valid_api_key}".encode())],
+    }
+
+    await middleware(scope, mock_receive, mock_send)
+
+    # Should send 403 error, not call downstream app
+    mock_app.assert_not_called()
+    assert mock_send.call_count == 2  # start + body
+
+    # Check the response
+    start_call = mock_send.call_args_list[0][0][0]
+    assert start_call["status"] == 403
+
+    body_call = mock_send.call_args_list[1][0][0]
+    body_text = body_call["body"].decode()
+    assert "Access denied" in body_text
+    assert "test.read" in body_text
+
+
+@patch("httpx.AsyncClient.post", new=mock_post_success_no_scope)
+async def test_public_endpoint_no_scope_required(scope_middleware_with_mocks, valid_api_key):
+    """Test that public endpoints work without specific scopes"""
+    middleware, mock_app = scope_middleware_with_mocks
+    mock_receive = AsyncMock()
+    mock_send = AsyncMock()
+
+    scope = {
+        "type": "http",
+        "path": "/test/public",
+        "method": "GET",
+        "headers": [(b"authorization", f"Bearer {valid_api_key}".encode())],
+    }
+
+    await middleware(scope, mock_receive, mock_send)
+
+    # Should call the downstream app (no error)
+    mock_app.assert_called_once_with(scope, mock_receive, mock_send)
+    mock_send.assert_not_called()
+
+
+async def test_scope_authorization_no_auth_disabled(scope_middleware_with_mocks):
+    """Test that when auth is disabled (no user), scope checks are bypassed"""
+    middleware, mock_app = scope_middleware_with_mocks
+    mock_receive = AsyncMock()
+    mock_send = AsyncMock()
+
+    scope = {
+        "type": "http",
+        "path": "/test/scoped",
+        "method": "POST",
+        "headers": [],  # No authorization header
+    }
+
+    await middleware(scope, mock_receive, mock_send)
+
+    # Should send 401 auth error, not call downstream app
+    mock_app.assert_not_called()
+    assert mock_send.call_count == 2  # start + body
+
+    # Check the response
+    start_call = mock_send.call_args_list[0][0][0]
+    assert start_call["status"] == 401
+
+    body_call = mock_send.call_args_list[1][0][0]
+    body_text = body_call["body"].decode()
+    assert "Authentication required" in body_text
+
+
+def test_has_required_scope_function():
+    """Test the _has_required_scope function directly"""
+    # Test user with required scope
+    user_with_scope = User(principal="test-user", attributes={"scopes": ["test.read", "other.scope"]})
+    assert _has_required_scope("test.read", user_with_scope)
+
+    # Test user without required scope
+    user_without_scope = User(principal="test-user", attributes={"scopes": ["other.scope"]})
+    assert not _has_required_scope("test.read", user_without_scope)
+
+    # Test user with no scopes attribute
+    user_no_scopes = User(principal="test-user", attributes={})
+    assert not _has_required_scope("test.read", user_no_scopes)
+
+    # Test no user (auth disabled)
+    assert _has_required_scope("test.read", None)