Merge branch 'main' into fix/issue-2584-llama4-tool-calling-v2

This commit is contained in:
Sumanth Kamenani 2025-07-28 09:06:49 -04:00 committed by GitHub
commit 09f42d9d91
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
183 changed files with 6226 additions and 5684 deletions

View file

@ -1,3 +0,0 @@
# Ollama external provider for Llama Stack
Template code to create a new external provider for Llama Stack.

View file

@ -1,7 +0,0 @@
adapter:
adapter_type: custom_ollama
pip_packages: ["ollama", "aiohttp", "tests/external-provider/llama-stack-provider-ollama"]
config_class: llama_stack_provider_ollama.config.OllamaImplConfig
module: llama_stack_provider_ollama
api_dependencies: []
optional_api_dependencies: []

View file

@ -1,43 +0,0 @@
[project]
dependencies = [
"llama-stack",
"pydantic",
"ollama",
"aiohttp",
"aiosqlite",
"autoevals",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
]
name = "llama-stack-provider-ollama"
version = "0.1.0"
description = "External provider for Ollama using the Llama Stack API"
readme = "README.md"
requires-python = ">=3.12"

View file

@ -1,124 +0,0 @@
version: 2
image_name: ollama
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ollama
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
metadata_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
agents_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200b}"
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
metadata_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
metadata_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
metadata_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: wolfram-alpha
provider_type: remote::wolfram-alpha
config:
api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: custom_ollama
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: custom_ollama
provider_model_id: all-minilm:l6-v2
model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha
server:
port: 8321
external_providers_dir: ~/.llama/providers.d

View file

@ -2,8 +2,10 @@ version: '2'
distribution_spec:
description: Custom distro for CI tests
providers:
inference:
- remote::custom_ollama
image_type: container
weather:
- provider_id: kaze
provider_type: remote::kaze
image_type: venv
image_name: ci-test
external_providers_dir: ~/.llama/providers.d
external_apis_dir: ~/.llama/apis.d

6
tests/external/kaze.yaml vendored Normal file
View file

@ -0,0 +1,6 @@
adapter:
adapter_type: kaze
pip_packages: ["tests/external/llama-stack-provider-kaze"]
config_class: llama_stack_provider_kaze.config.KazeProviderConfig
module: llama_stack_provider_kaze
optional_api_dependencies: []

View file

@ -0,0 +1,15 @@
[project]
name = "llama-stack-api-weather"
version = "0.1.0"
description = "Weather API for Llama Stack"
readme = "README.md"
requires-python = ">=3.10"
dependencies = ["llama-stack", "pydantic"]
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["src"]
include = ["llama_stack_api_weather", "llama_stack_api_weather.*"]

View file

@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Weather API for Llama Stack."""
from .weather import WeatherProvider, available_providers
__all__ = ["WeatherProvider", "available_providers"]

View file

@ -0,0 +1,39 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Protocol
from llama_stack.providers.datatypes import AdapterSpec, Api, ProviderSpec, RemoteProviderSpec
from llama_stack.schema_utils import webmethod
def available_providers() -> list[ProviderSpec]:
return [
RemoteProviderSpec(
api=Api.weather,
provider_type="remote::kaze",
config_class="llama_stack_provider_kaze.KazeProviderConfig",
adapter=AdapterSpec(
adapter_type="kaze",
module="llama_stack_provider_kaze",
pip_packages=["llama_stack_provider_kaze"],
config_class="llama_stack_provider_kaze.KazeProviderConfig",
),
),
]
class WeatherProvider(Protocol):
"""
A protocol for the Weather API.
"""
@webmethod(route="/weather/locations", method="GET")
async def get_available_locations() -> dict[str, list[str]]:
"""
Get the available locations.
"""
...

View file

@ -0,0 +1,15 @@
[project]
name = "llama-stack-provider-kaze"
version = "0.1.0"
description = "Kaze weather provider for Llama Stack"
readme = "README.md"
requires-python = ">=3.10"
dependencies = ["llama-stack", "pydantic", "aiohttp"]
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["src"]
include = ["llama_stack_provider_kaze", "llama_stack_provider_kaze.*"]

View file

@ -0,0 +1,20 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Kaze weather provider for Llama Stack."""
from .config import KazeProviderConfig
from .kaze import WeatherKazeAdapter
__all__ = ["KazeProviderConfig", "WeatherKazeAdapter"]
async def get_adapter_impl(config: KazeProviderConfig, _deps):
from .kaze import WeatherKazeAdapter
impl = WeatherKazeAdapter(config)
await impl.initialize()
return impl

View file

@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pydantic import BaseModel
class KazeProviderConfig(BaseModel):
"""Configuration for the Kaze weather provider."""

View file

@ -0,0 +1,26 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack_api_weather.weather import WeatherProvider
from .config import KazeProviderConfig
class WeatherKazeAdapter(WeatherProvider):
"""Kaze weather provider implementation."""
def __init__(
self,
config: KazeProviderConfig,
) -> None:
self.config = config
async def initialize(self) -> None:
pass
async def get_available_locations(self) -> dict[str, list[str]]:
"""Get available weather locations."""
return {"locations": ["Paris", "Tokyo"]}

View file

@ -0,0 +1,14 @@
version: 2
distribution_spec:
description: Use (an external) Ramalama server for running LLM inference
container_image: null
providers:
inference:
- provider_id: ramalama
provider_type: remote::ramalama
module: ramalama_stack==0.3.0a0
image_type: venv
image_name: ramalama-stack-test
additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]

12
tests/external/ramalama-stack/run.yaml vendored Normal file
View file

@ -0,0 +1,12 @@
version: 2
image_name: ramalama
apis:
- inference
providers:
inference:
- provider_id: ramalama
provider_type: remote::ramalama
module: ramalama_stack==0.3.0a0
config: {}
server:
port: 8321

13
tests/external/run-byoa.yaml vendored Normal file
View file

@ -0,0 +1,13 @@
version: "2"
image_name: "llama-stack-api-weather"
apis:
- weather
providers:
weather:
- provider_id: kaze
provider_type: remote::kaze
config: {}
external_apis_dir: ~/.llama/apis.d
external_providers_dir: ~/.llama/providers.d
server:
port: 8321

4
tests/external/weather.yaml vendored Normal file
View file

@ -0,0 +1,4 @@
module: llama_stack_api_weather
name: weather
pip_packages: ["tests/external/llama-stack-api-weather"]
protocol: WeatherProvider

View file

@ -179,9 +179,7 @@ def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_model
model=text_model_id,
prompt=prompt,
stream=False,
extra_body={
"prompt_logprobs": prompt_logprobs,
},
prompt_logprobs=prompt_logprobs,
)
assert len(response.choices) > 0
choice = response.choices[0]
@ -196,9 +194,7 @@ def test_openai_completion_guided_choice(llama_stack_client, client_with_models,
model=text_model_id,
prompt=prompt,
stream=False,
extra_body={
"guided_choice": ["joy", "sadness"],
},
guided_choice=["joy", "sadness"],
)
assert len(response.choices) > 0
choice = response.choices[0]

View file

@ -20,22 +20,15 @@ logger = logging.getLogger(__name__)
def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models):
vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"]
for p in vector_io_providers:
if p.provider_type in ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "inline::chromadb"]:
return
pytest.skip("OpenAI vector stores are not supported by any provider")
def skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models):
vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"]
for p in vector_io_providers:
if p.provider_type in [
"inline::faiss",
"inline::sqlite-vec",
"inline::milvus",
"inline::chromadb",
"remote::pgvector",
"remote::chromadb",
]:
return
@ -457,7 +450,6 @@ def test_openai_vector_store_search_with_max_num_results(
def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store attach file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@ -509,7 +501,6 @@ def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client
def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store attach files on creation."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@ -566,7 +557,6 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store list files."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
@ -640,7 +630,6 @@ def test_openai_vector_store_list_files_invalid_vector_store(compat_client_with_
def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store retrieve file contents."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files retrieve contents is not yet supported with LlamaStackClient")
@ -682,7 +671,6 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store delete file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
@ -735,12 +723,9 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
assert updated_vector_store.file_counts.in_progress == 0
# TODO: Remove this xfail once we have a way to remove embeddings from vector store
@pytest.mark.xfail(reason="Vector Store Files delete doesn't remove embeddings from vector store", strict=True)
def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store delete file removes from vector store."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@ -782,7 +767,6 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client
def test_openai_vector_store_update_file(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store update file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files update is not yet supported with LlamaStackClient")
@ -831,7 +815,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(compat_client_wit
This test confirms that client.vector_stores.create() creates a unique ID
"""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files create is not yet supported with LlamaStackClient")

View file

@ -15,6 +15,7 @@ from llama_stack.apis.models import Model, ModelType
from llama_stack.apis.shields.shields import Shield
from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.distribution.datatypes import RegistryEntrySource
from llama_stack.distribution.routing_tables.benchmarks import BenchmarksRoutingTable
from llama_stack.distribution.routing_tables.datasets import DatasetsRoutingTable
from llama_stack.distribution.routing_tables.models import ModelsRoutingTable
@ -45,6 +46,30 @@ class InferenceImpl(Impl):
async def unregister_model(self, model_id: str):
return model_id
async def should_refresh_models(self):
return False
async def list_models(self):
return [
Model(
identifier="provider-model-1",
provider_resource_id="provider-model-1",
provider_id="test_provider",
metadata={},
model_type=ModelType.llm,
),
Model(
identifier="provider-model-2",
provider_resource_id="provider-model-2",
provider_id="test_provider",
metadata={"embedding_dimension": 512},
model_type=ModelType.embedding,
),
]
async def shutdown(self):
pass
class SafetyImpl(Impl):
def __init__(self):
@ -378,3 +403,170 @@ async def test_models_fallback_lookup_behavior(cached_disk_dist_registry):
raise AssertionError("Should have raised ValueError for non-existent model")
except ValueError as e:
assert "not found" in str(e)
async def test_models_source_tracking_default(cached_disk_dist_registry):
"""Test that models registered via register_model get default source."""
table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
await table.initialize()
# Register model via register_model (should get default source)
await table.register_model(model_id="user-model", provider_id="test_provider")
models = await table.list_models()
assert len(models.data) == 1
model = models.data[0]
assert model.source == RegistryEntrySource.via_register_api
assert model.identifier == "test_provider/user-model"
# Cleanup
await table.shutdown()
async def test_models_source_tracking_provider(cached_disk_dist_registry):
"""Test that models registered via update_registered_models get provider source."""
table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
await table.initialize()
# Simulate provider refresh by calling update_registered_models
provider_models = [
Model(
identifier="provider-model-1",
provider_resource_id="provider-model-1",
provider_id="test_provider",
metadata={},
model_type=ModelType.llm,
),
Model(
identifier="provider-model-2",
provider_resource_id="provider-model-2",
provider_id="test_provider",
metadata={"embedding_dimension": 512},
model_type=ModelType.embedding,
),
]
await table.update_registered_models("test_provider", provider_models)
models = await table.list_models()
assert len(models.data) == 2
# All models should have provider source
for model in models.data:
assert model.source == RegistryEntrySource.listed_from_provider
assert model.provider_id == "test_provider"
# Cleanup
await table.shutdown()
async def test_models_source_interaction_preserves_default(cached_disk_dist_registry):
"""Test that provider refresh preserves user-registered models with default source."""
table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
await table.initialize()
# First register a user model with same provider_resource_id as provider will later provide
await table.register_model(
model_id="my-custom-alias", provider_model_id="provider-model-1", provider_id="test_provider"
)
# Verify user model is registered with default source
models = await table.list_models()
assert len(models.data) == 1
user_model = models.data[0]
assert user_model.source == RegistryEntrySource.via_register_api
assert user_model.identifier == "my-custom-alias"
assert user_model.provider_resource_id == "provider-model-1"
# Now simulate provider refresh
provider_models = [
Model(
identifier="provider-model-1",
provider_resource_id="provider-model-1",
provider_id="test_provider",
metadata={},
model_type=ModelType.llm,
),
Model(
identifier="different-model",
provider_resource_id="different-model",
provider_id="test_provider",
metadata={},
model_type=ModelType.llm,
),
]
await table.update_registered_models("test_provider", provider_models)
# Verify user model with alias is preserved, but provider added new model
models = await table.list_models()
assert len(models.data) == 2
# Find the user model and provider model
user_model = next((m for m in models.data if m.identifier == "my-custom-alias"), None)
provider_model = next((m for m in models.data if m.identifier == "test_provider/different-model"), None)
assert user_model is not None
assert user_model.source == RegistryEntrySource.via_register_api
assert user_model.provider_resource_id == "provider-model-1"
assert provider_model is not None
assert provider_model.source == RegistryEntrySource.listed_from_provider
assert provider_model.provider_resource_id == "different-model"
# Cleanup
await table.shutdown()
async def test_models_source_interaction_cleanup_provider_models(cached_disk_dist_registry):
"""Test that provider refresh removes old provider models but keeps default ones."""
table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
await table.initialize()
# Register a user model
await table.register_model(model_id="user-model", provider_id="test_provider")
# Add some provider models
provider_models_v1 = [
Model(
identifier="provider-model-old",
provider_resource_id="provider-model-old",
provider_id="test_provider",
metadata={},
model_type=ModelType.llm,
),
]
await table.update_registered_models("test_provider", provider_models_v1)
# Verify we have both user and provider models
models = await table.list_models()
assert len(models.data) == 2
# Now update with new provider models (should remove old provider models)
provider_models_v2 = [
Model(
identifier="provider-model-new",
provider_resource_id="provider-model-new",
provider_id="test_provider",
metadata={},
model_type=ModelType.llm,
),
]
await table.update_registered_models("test_provider", provider_models_v2)
# Should have user model + new provider model, old provider model gone
models = await table.list_models()
assert len(models.data) == 2
identifiers = {m.identifier for m in models.data}
assert "test_provider/user-model" in identifiers # User model preserved
assert "test_provider/provider-model-new" in identifiers # New provider model (uses provider's identifier)
assert "test_provider/provider-model-old" not in identifiers # Old provider model removed
# Verify sources are correct
user_model = next((m for m in models.data if m.identifier == "test_provider/user-model"), None)
provider_model = next((m for m in models.data if m.identifier == "test_provider/provider-model-new"), None)
assert user_model.source == RegistryEntrySource.via_register_api
assert provider_model.source == RegistryEntrySource.listed_from_provider
# Cleanup
await table.shutdown()

View file

@ -106,6 +106,40 @@ def api_directories(tmp_path):
return remote_inference_dir, inline_inference_dir
def make_import_module_side_effect(
builtin_provider_spec=None,
external_module=None,
raise_for_external=False,
missing_get_provider_spec=False,
):
from types import SimpleNamespace
def import_module_side_effect(name):
if name == "llama_stack.providers.registry.inference":
mock_builtin = SimpleNamespace(
available_providers=lambda: [
builtin_provider_spec
or ProviderSpec(
api=Api.inference,
provider_type="test_provider",
config_class="test_provider.config.TestProviderConfig",
module="test_provider",
)
]
)
return mock_builtin
elif name == "external_test.provider":
if raise_for_external:
raise ModuleNotFoundError(name)
if missing_get_provider_spec:
return SimpleNamespace()
return external_module
else:
raise ModuleNotFoundError(name)
return import_module_side_effect
class TestProviderRegistry:
"""Test suite for provider registry functionality."""
@ -221,3 +255,124 @@ pip_packages:
with pytest.raises(KeyError) as exc_info:
get_provider_registry(base_config)
assert "config_class" in str(exc_info.value)
def test_external_provider_from_module_success(self, mock_providers):
"""Test loading an external provider from a module (success path)."""
from types import SimpleNamespace
from llama_stack.distribution.datatypes import Provider, StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec
# Simulate a provider module with get_provider_spec
fake_spec = ProviderSpec(
api=Api.inference,
provider_type="external_test",
config_class="external_test.config.ExternalTestConfig",
module="external_test",
)
fake_module = SimpleNamespace(get_provider_spec=lambda: fake_spec)
import_module_side_effect = make_import_module_side_effect(external_module=fake_module)
with patch("importlib.import_module", side_effect=import_module_side_effect) as mock_import:
config = StackRunConfig(
image_name="test_image",
providers={
"inference": [
Provider(
provider_id="external_test",
provider_type="external_test",
config={},
module="external_test",
)
]
},
)
registry = get_provider_registry(config)
assert Api.inference in registry
assert "external_test" in registry[Api.inference]
provider = registry[Api.inference]["external_test"]
assert provider.module == "external_test"
assert provider.config_class == "external_test.config.ExternalTestConfig"
mock_import.assert_any_call("llama_stack.providers.registry.inference")
mock_import.assert_any_call("external_test.provider")
def test_external_provider_from_module_not_found(self, mock_providers):
"""Test handling ModuleNotFoundError for missing provider module."""
from llama_stack.distribution.datatypes import Provider, StackRunConfig
import_module_side_effect = make_import_module_side_effect(raise_for_external=True)
with patch("importlib.import_module", side_effect=import_module_side_effect):
config = StackRunConfig(
image_name="test_image",
providers={
"inference": [
Provider(
provider_id="external_test",
provider_type="external_test",
config={},
module="external_test",
)
]
},
)
with pytest.raises(ValueError) as exc_info:
get_provider_registry(config)
assert "get_provider_spec not found" in str(exc_info.value)
def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers):
"""Test handling missing get_provider_spec in provider module (should raise ValueError)."""
from llama_stack.distribution.datatypes import Provider, StackRunConfig
import_module_side_effect = make_import_module_side_effect(missing_get_provider_spec=True)
with patch("importlib.import_module", side_effect=import_module_side_effect):
config = StackRunConfig(
image_name="test_image",
providers={
"inference": [
Provider(
provider_id="external_test",
provider_type="external_test",
config={},
module="external_test",
)
]
},
)
with pytest.raises(AttributeError):
get_provider_registry(config)
def test_external_provider_from_module_building(self, mock_providers):
"""Test loading an external provider from a module during build (building=True, partial spec)."""
from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec, Provider
from llama_stack.providers.datatypes import Api
# No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
build_config = BuildConfig(
version=2,
image_type="container",
image_name="test_image",
distribution_spec=DistributionSpec(
description="test",
providers={
"inference": [
Provider(
provider_id="external_test",
provider_type="external_test",
config={},
module="external_test",
)
]
},
),
)
registry = get_provider_registry(build_config)
assert Api.inference in registry
assert "external_test" in registry[Api.inference]
provider = registry[Api.inference]["external_test"]
assert provider.module == "external_test"
assert provider.is_external is True
# config_class is empty string in partial spec
assert provider.config_class == ""

View file

@ -5,321 +5,353 @@
# the root directory of this source tree.
import os
import unittest
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from llama_stack.apis.inference import CompletionMessage, UserMessage
from llama_stack.apis.resource import ResourceType
from llama_stack.apis.safety import RunShieldResponse, ViolationLevel
from llama_stack.apis.shields import Shield
from llama_stack.models.llama.datatypes import StopReason
from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
class TestNVIDIASafetyAdapter(unittest.TestCase):
def setUp(self):
os.environ["NVIDIA_GUARDRAILS_URL"] = "http://nemo.test"
class TestNVIDIASafetyAdapter(NVIDIASafetyAdapter):
"""Test implementation that provides the required shield_store."""
# Initialize the adapter
self.config = NVIDIASafetyConfig(
guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
)
self.adapter = NVIDIASafetyAdapter(config=self.config)
self.shield_store = AsyncMock()
self.adapter.shield_store = self.shield_store
def __init__(self, config: NVIDIASafetyConfig, shield_store):
super().__init__(config)
self.shield_store = shield_store
# Mock the HTTP request methods
self.guardrails_post_patcher = patch(
"llama_stack.providers.remote.safety.nvidia.nvidia.NeMoGuardrails._guardrails_post"
)
self.mock_guardrails_post = self.guardrails_post_patcher.start()
self.mock_guardrails_post.return_value = {"status": "allowed"}
def tearDown(self):
"""Clean up after each test."""
self.guardrails_post_patcher.stop()
@pytest.fixture
def nvidia_adapter():
"""Set up the NVIDIASafetyAdapter for testing."""
os.environ["NVIDIA_GUARDRAILS_URL"] = "http://nemo.test"
@pytest.fixture(autouse=True)
def inject_fixtures(self, run_async):
self.run_async = run_async
# Initialize the adapter
config = NVIDIASafetyConfig(
guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
)
def _assert_request(
self,
mock_call: MagicMock,
expected_url: str,
expected_headers: dict[str, str] | None = None,
expected_json: dict[str, Any] | None = None,
) -> None:
"""
Helper method to verify request details in mock API calls.
# Create a mock shield store that implements the ShieldStore protocol
shield_store = AsyncMock()
shield_store.get_shield = AsyncMock()
Args:
mock_call: The MagicMock object that was called
expected_url: The expected URL to which the request was made
expected_headers: Optional dictionary of expected request headers
expected_json: Optional dictionary of expected JSON payload
"""
call_args = mock_call.call_args
adapter = TestNVIDIASafetyAdapter(config=config, shield_store=shield_store)
# Check URL
assert call_args[0][0] == expected_url
return adapter
# Check headers if provided
if expected_headers:
for key, value in expected_headers.items():
assert call_args[1]["headers"][key] == value
# Check JSON if provided
if expected_json:
for key, value in expected_json.items():
if isinstance(value, dict):
for nested_key, nested_value in value.items():
assert call_args[1]["json"][key][nested_key] == nested_value
else:
assert call_args[1]["json"][key] == value
@pytest.fixture
def mock_guardrails_post():
"""Mock the HTTP request methods."""
with patch("llama_stack.providers.remote.safety.nvidia.nvidia.NeMoGuardrails._guardrails_post") as mock_post:
mock_post.return_value = {"status": "allowed"}
yield mock_post
def test_register_shield_with_valid_id(self):
shield = Shield(
provider_id="nvidia",
type="shield",
identifier="test-shield",
provider_resource_id="test-model",
)
# Register the shield
self.run_async(self.adapter.register_shield(shield))
def _assert_request(
mock_call: MagicMock,
expected_url: str,
expected_headers: dict[str, str] | None = None,
expected_json: dict[str, Any] | None = None,
) -> None:
"""
Helper method to verify request details in mock API calls.
def test_register_shield_without_id(self):
shield = Shield(
provider_id="nvidia",
type="shield",
identifier="test-shield",
provider_resource_id="",
)
Args:
mock_call: The MagicMock object that was called
expected_url: The expected URL to which the request was made
expected_headers: Optional dictionary of expected request headers
expected_json: Optional dictionary of expected JSON payload
"""
call_args = mock_call.call_args
# Register the shield should raise a ValueError
with self.assertRaises(ValueError):
self.run_async(self.adapter.register_shield(shield))
# Check URL
assert call_args[0][0] == expected_url
def test_run_shield_allowed(self):
# Set up the shield
shield_id = "test-shield"
shield = Shield(
provider_id="nvidia",
type="shield",
identifier=shield_id,
provider_resource_id="test-model",
)
self.shield_store.get_shield.return_value = shield
# Check headers if provided
if expected_headers:
for key, value in expected_headers.items():
assert call_args[1]["headers"][key] == value
# Mock Guardrails API response
self.mock_guardrails_post.return_value = {"status": "allowed"}
# Check JSON if provided
if expected_json:
for key, value in expected_json.items():
if isinstance(value, dict):
for nested_key, nested_value in value.items():
assert call_args[1]["json"][key][nested_key] == nested_value
else:
assert call_args[1]["json"][key] == value
# Run the shield
messages = [
UserMessage(role="user", content="Hello, how are you?"),
CompletionMessage(
role="assistant",
content="I'm doing well, thank you for asking!",
stop_reason="end_of_message",
tool_calls=[],
),
]
result = self.run_async(self.adapter.run_shield(shield_id, messages))
# Verify the shield store was called
self.shield_store.get_shield.assert_called_once_with(shield_id)
async def test_register_shield_with_valid_id(nvidia_adapter):
adapter = nvidia_adapter
# Verify the Guardrails API was called correctly
self.mock_guardrails_post.assert_called_once_with(
path="/v1/guardrail/checks",
data={
"model": shield_id,
"messages": [
{"role": "user", "content": "Hello, how are you?"},
{"role": "assistant", "content": "I'm doing well, thank you for asking!"},
],
"temperature": 1.0,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
"max_tokens": 160,
"stream": False,
"guardrails": {
"config_id": "self-check",
},
shield = Shield(
provider_id="nvidia",
type=ResourceType.shield,
identifier="test-shield",
provider_resource_id="test-model",
)
# Register the shield
await adapter.register_shield(shield)
async def test_register_shield_without_id(nvidia_adapter):
adapter = nvidia_adapter
shield = Shield(
provider_id="nvidia",
type=ResourceType.shield,
identifier="test-shield",
provider_resource_id="",
)
# Register the shield should raise a ValueError
with pytest.raises(ValueError):
await adapter.register_shield(shield)
async def test_run_shield_allowed(nvidia_adapter, mock_guardrails_post):
adapter = nvidia_adapter
# Set up the shield
shield_id = "test-shield"
shield = Shield(
provider_id="nvidia",
type=ResourceType.shield,
identifier=shield_id,
provider_resource_id="test-model",
)
adapter.shield_store.get_shield.return_value = shield
# Mock Guardrails API response
mock_guardrails_post.return_value = {"status": "allowed"}
# Run the shield
messages = [
UserMessage(role="user", content="Hello, how are you?"),
CompletionMessage(
role="assistant",
content="I'm doing well, thank you for asking!",
stop_reason=StopReason.end_of_message,
tool_calls=[],
),
]
result = await adapter.run_shield(shield_id, messages)
# Verify the shield store was called
adapter.shield_store.get_shield.assert_called_once_with(shield_id)
# Verify the Guardrails API was called correctly
mock_guardrails_post.assert_called_once_with(
path="/v1/guardrail/checks",
data={
"model": shield_id,
"messages": [
{"role": "user", "content": "Hello, how are you?"},
{"role": "assistant", "content": "I'm doing well, thank you for asking!"},
],
"temperature": 1.0,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
"max_tokens": 160,
"stream": False,
"guardrails": {
"config_id": "self-check",
},
)
},
)
# Verify the result
assert isinstance(result, RunShieldResponse)
assert result.violation is None
# Verify the result
assert isinstance(result, RunShieldResponse)
assert result.violation is None
def test_run_shield_blocked(self):
# Set up the shield
shield_id = "test-shield"
shield = Shield(
provider_id="nvidia",
type="shield",
identifier=shield_id,
provider_resource_id="test-model",
)
self.shield_store.get_shield.return_value = shield
# Mock Guardrails API response
self.mock_guardrails_post.return_value = {"status": "blocked", "rails_status": {"reason": "harmful_content"}}
async def test_run_shield_blocked(nvidia_adapter, mock_guardrails_post):
adapter = nvidia_adapter
# Run the shield
messages = [
UserMessage(role="user", content="Hello, how are you?"),
CompletionMessage(
role="assistant",
content="I'm doing well, thank you for asking!",
stop_reason="end_of_message",
tool_calls=[],
),
]
result = self.run_async(self.adapter.run_shield(shield_id, messages))
# Set up the shield
shield_id = "test-shield"
shield = Shield(
provider_id="nvidia",
type=ResourceType.shield,
identifier=shield_id,
provider_resource_id="test-model",
)
adapter.shield_store.get_shield.return_value = shield
# Verify the shield store was called
self.shield_store.get_shield.assert_called_once_with(shield_id)
# Mock Guardrails API response
mock_guardrails_post.return_value = {"status": "blocked", "rails_status": {"reason": "harmful_content"}}
# Verify the Guardrails API was called correctly
self.mock_guardrails_post.assert_called_once_with(
path="/v1/guardrail/checks",
data={
"model": shield_id,
"messages": [
{"role": "user", "content": "Hello, how are you?"},
{"role": "assistant", "content": "I'm doing well, thank you for asking!"},
],
"temperature": 1.0,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
"max_tokens": 160,
"stream": False,
"guardrails": {
"config_id": "self-check",
},
# Run the shield
messages = [
UserMessage(role="user", content="Hello, how are you?"),
CompletionMessage(
role="assistant",
content="I'm doing well, thank you for asking!",
stop_reason=StopReason.end_of_message,
tool_calls=[],
),
]
result = await adapter.run_shield(shield_id, messages)
# Verify the shield store was called
adapter.shield_store.get_shield.assert_called_once_with(shield_id)
# Verify the Guardrails API was called correctly
mock_guardrails_post.assert_called_once_with(
path="/v1/guardrail/checks",
data={
"model": shield_id,
"messages": [
{"role": "user", "content": "Hello, how are you?"},
{"role": "assistant", "content": "I'm doing well, thank you for asking!"},
],
"temperature": 1.0,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
"max_tokens": 160,
"stream": False,
"guardrails": {
"config_id": "self-check",
},
)
},
)
# Verify the result
assert result.violation is not None
assert isinstance(result, RunShieldResponse)
assert result.violation.user_message == "Sorry I cannot do this."
assert result.violation.violation_level == ViolationLevel.ERROR
assert result.violation.metadata == {"reason": "harmful_content"}
# Verify the result
assert result.violation is not None
assert isinstance(result, RunShieldResponse)
assert result.violation.user_message == "Sorry I cannot do this."
assert result.violation.violation_level == ViolationLevel.ERROR
assert result.violation.metadata == {"reason": "harmful_content"}
def test_run_shield_not_found(self):
# Set up shield store to return None
shield_id = "non-existent-shield"
self.shield_store.get_shield.return_value = None
messages = [
UserMessage(role="user", content="Hello, how are you?"),
]
async def test_run_shield_not_found(nvidia_adapter, mock_guardrails_post):
adapter = nvidia_adapter
with self.assertRaises(ValueError):
self.run_async(self.adapter.run_shield(shield_id, messages))
# Set up shield store to return None
shield_id = "non-existent-shield"
adapter.shield_store.get_shield.return_value = None
# Verify the shield store was called
self.shield_store.get_shield.assert_called_once_with(shield_id)
messages = [
UserMessage(role="user", content="Hello, how are you?"),
]
# Verify the Guardrails API was not called
self.mock_guardrails_post.assert_not_called()
with pytest.raises(ValueError):
await adapter.run_shield(shield_id, messages)
def test_run_shield_http_error(self):
shield_id = "test-shield"
shield = Shield(
provider_id="nvidia",
type="shield",
identifier=shield_id,
provider_resource_id="test-model",
)
self.shield_store.get_shield.return_value = shield
# Verify the shield store was called
adapter.shield_store.get_shield.assert_called_once_with(shield_id)
# Mock Guardrails API to raise an exception
error_msg = "API Error: 500 Internal Server Error"
self.mock_guardrails_post.side_effect = Exception(error_msg)
# Verify the Guardrails API was not called
mock_guardrails_post.assert_not_called()
# Running the shield should raise an exception
messages = [
UserMessage(role="user", content="Hello, how are you?"),
CompletionMessage(
role="assistant",
content="I'm doing well, thank you for asking!",
stop_reason="end_of_message",
tool_calls=[],
),
]
with self.assertRaises(Exception) as context:
self.run_async(self.adapter.run_shield(shield_id, messages))
# Verify the shield store was called
self.shield_store.get_shield.assert_called_once_with(shield_id)
async def test_run_shield_http_error(nvidia_adapter, mock_guardrails_post):
adapter = nvidia_adapter
# Verify the Guardrails API was called correctly
self.mock_guardrails_post.assert_called_once_with(
path="/v1/guardrail/checks",
data={
"model": shield_id,
"messages": [
{"role": "user", "content": "Hello, how are you?"},
{"role": "assistant", "content": "I'm doing well, thank you for asking!"},
],
"temperature": 1.0,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
"max_tokens": 160,
"stream": False,
"guardrails": {
"config_id": "self-check",
},
shield_id = "test-shield"
shield = Shield(
provider_id="nvidia",
type=ResourceType.shield,
identifier=shield_id,
provider_resource_id="test-model",
)
adapter.shield_store.get_shield.return_value = shield
# Mock Guardrails API to raise an exception
error_msg = "API Error: 500 Internal Server Error"
mock_guardrails_post.side_effect = Exception(error_msg)
# Running the shield should raise an exception
messages = [
UserMessage(role="user", content="Hello, how are you?"),
CompletionMessage(
role="assistant",
content="I'm doing well, thank you for asking!",
stop_reason=StopReason.end_of_message,
tool_calls=[],
),
]
with pytest.raises(Exception) as exc_info:
await adapter.run_shield(shield_id, messages)
# Verify the shield store was called
adapter.shield_store.get_shield.assert_called_once_with(shield_id)
# Verify the Guardrails API was called correctly
mock_guardrails_post.assert_called_once_with(
path="/v1/guardrail/checks",
data={
"model": shield_id,
"messages": [
{"role": "user", "content": "Hello, how are you?"},
{"role": "assistant", "content": "I'm doing well, thank you for asking!"},
],
"temperature": 1.0,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
"max_tokens": 160,
"stream": False,
"guardrails": {
"config_id": "self-check",
},
)
# Verify the exception message
assert error_msg in str(context.exception)
},
)
# Verify the exception message
assert error_msg in str(exc_info.value)
def test_init_nemo_guardrails(self):
from llama_stack.providers.remote.safety.nvidia.nvidia import NeMoGuardrails
test_config_id = "test-custom-config-id"
config = NVIDIASafetyConfig(
guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
config_id=test_config_id,
)
# Initialize with default parameters
test_model = "test-model"
guardrails = NeMoGuardrails(config, test_model)
def test_init_nemo_guardrails():
from llama_stack.providers.remote.safety.nvidia.nvidia import NeMoGuardrails
# Verify the attributes are set correctly
assert guardrails.config_id == test_config_id
assert guardrails.model == test_model
assert guardrails.threshold == 0.9 # Default value
assert guardrails.temperature == 1.0 # Default value
assert guardrails.guardrails_service_url == os.environ["NVIDIA_GUARDRAILS_URL"]
os.environ["NVIDIA_GUARDRAILS_URL"] = "http://nemo.test"
# Initialize with custom parameters
guardrails = NeMoGuardrails(config, test_model, threshold=0.8, temperature=0.7)
test_config_id = "test-custom-config-id"
config = NVIDIASafetyConfig(
guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
config_id=test_config_id,
)
# Initialize with default parameters
test_model = "test-model"
guardrails = NeMoGuardrails(config, test_model)
# Verify the attributes are set correctly
assert guardrails.config_id == test_config_id
assert guardrails.model == test_model
assert guardrails.threshold == 0.8
assert guardrails.temperature == 0.7
assert guardrails.guardrails_service_url == os.environ["NVIDIA_GUARDRAILS_URL"]
# Verify the attributes are set correctly
assert guardrails.config_id == test_config_id
assert guardrails.model == test_model
assert guardrails.threshold == 0.9 # Default value
assert guardrails.temperature == 1.0 # Default value
assert guardrails.guardrails_service_url == os.environ["NVIDIA_GUARDRAILS_URL"]
def test_init_nemo_guardrails_invalid_temperature(self):
from llama_stack.providers.remote.safety.nvidia.nvidia import NeMoGuardrails
# Initialize with custom parameters
guardrails = NeMoGuardrails(config, test_model, threshold=0.8, temperature=0.7)
config = NVIDIASafetyConfig(
guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
config_id="test-custom-config-id",
)
with self.assertRaises(ValueError):
NeMoGuardrails(config, "test-model", temperature=0)
# Verify the attributes are set correctly
assert guardrails.config_id == test_config_id
assert guardrails.model == test_model
assert guardrails.threshold == 0.8
assert guardrails.temperature == 0.7
assert guardrails.guardrails_service_url == os.environ["NVIDIA_GUARDRAILS_URL"]
def test_init_nemo_guardrails_invalid_temperature():
from llama_stack.providers.remote.safety.nvidia.nvidia import NeMoGuardrails
os.environ["NVIDIA_GUARDRAILS_URL"] = "http://nemo.test"
config = NVIDIASafetyConfig(
guardrails_service_url=os.environ["NVIDIA_GUARDRAILS_URL"],
config_id="test-custom-config-id",
)
with pytest.raises(ValueError):
NeMoGuardrails(config, "test-model", temperature=0)

View file

@ -8,6 +8,7 @@ import random
import numpy as np
import pytest
from chromadb import PersistentClient
from pymilvus import MilvusClient, connections
from llama_stack.apis.vector_dbs import VectorDB
@ -18,7 +19,7 @@ from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, Faiss
from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig, SqliteKVStoreConfig
from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter
from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter, maybe_await
from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusIndex, MilvusVectorIOAdapter
EMBEDDING_DIMENSION = 384
@ -26,6 +27,11 @@ COLLECTION_PREFIX = "test_collection"
MILVUS_ALIAS = "test_milvus"
@pytest.fixture(params=["milvus", "sqlite_vec", "faiss", "chroma"])
def vector_provider(request):
return request.param
@pytest.fixture
def vector_db_id() -> str:
return f"test-vector-db-{random.randint(1, 100)}"
@ -94,11 +100,6 @@ def sample_embeddings_with_metadata(sample_chunks_with_metadata):
return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks_with_metadata])
@pytest.fixture(params=["milvus", "sqlite_vec", "faiss"])
def vector_provider(request):
return request.param
@pytest.fixture(scope="session")
def mock_inference_api(embedding_dimension):
class MockInferenceAPI:
@ -246,10 +247,10 @@ def chroma_vec_db_path(tmp_path_factory):
@pytest.fixture
async def chroma_vec_index(chroma_vec_db_path, embedding_dimension):
index = ChromaIndex(
embedding_dimension=embedding_dimension,
persist_directory=chroma_vec_db_path,
)
client = PersistentClient(path=chroma_vec_db_path)
name = f"{COLLECTION_PREFIX}_{np.random.randint(1e6)}"
collection = await maybe_await(client.get_or_create_collection(name))
index = ChromaIndex(client=client, collection=collection)
await index.initialize()
yield index
await index.delete()
@ -257,7 +258,10 @@ async def chroma_vec_index(chroma_vec_db_path, embedding_dimension):
@pytest.fixture
async def chroma_vec_adapter(chroma_vec_db_path, mock_inference_api, embedding_dimension):
config = ChromaVectorIOConfig(persist_directory=chroma_vec_db_path)
config = ChromaVectorIOConfig(
db_path=chroma_vec_db_path,
kvstore=SqliteKVStoreConfig(),
)
adapter = ChromaVectorIOAdapter(
config=config,
inference_api=mock_inference_api,

View file

@ -86,10 +86,14 @@ async def test_register_and_unregister_vector_db(vector_io_adapter):
assert dummy.identifier not in vector_io_adapter.cache
async def test_query_unregistered_raises(vector_io_adapter):
async def test_query_unregistered_raises(vector_io_adapter, vector_provider):
fake_emb = np.zeros(8, dtype=np.float32)
with pytest.raises(ValueError):
await vector_io_adapter.query_chunks("no_such_db", fake_emb)
if vector_provider == "chroma":
with pytest.raises(AttributeError):
await vector_io_adapter.query_chunks("no_such_db", fake_emb)
else:
with pytest.raises(ValueError):
await vector_io_adapter.query_chunks("no_such_db", fake_emb)
async def test_insert_chunks_calls_underlying_index(vector_io_adapter):

View file

@ -19,7 +19,8 @@ from llama_stack.distribution.datatypes import (
OAuth2JWKSConfig,
OAuth2TokenAuthConfig,
)
from llama_stack.distribution.server.auth import AuthenticationMiddleware
from llama_stack.distribution.request_headers import User
from llama_stack.distribution.server.auth import AuthenticationMiddleware, _has_required_scope
from llama_stack.distribution.server.auth_providers import (
get_attributes_from_claims,
)
@ -73,7 +74,7 @@ def http_app(mock_auth_endpoint):
),
access_policy=[],
)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})
@app.get("/test")
def test_endpoint():
@ -111,7 +112,50 @@ def mock_http_middleware(mock_auth_endpoint):
),
access_policy=[],
)
return AuthenticationMiddleware(mock_app, auth_config), mock_app
return AuthenticationMiddleware(mock_app, auth_config, {}), mock_app
@pytest.fixture
def mock_impls():
"""Mock implementations for scope testing"""
return {}
@pytest.fixture
def scope_middleware_with_mocks(mock_auth_endpoint):
"""Create AuthenticationMiddleware with mocked route implementations"""
mock_app = AsyncMock()
auth_config = AuthenticationConfig(
provider_config=CustomAuthConfig(
type=AuthProviderType.CUSTOM,
endpoint=mock_auth_endpoint,
),
access_policy=[],
)
middleware = AuthenticationMiddleware(mock_app, auth_config, {})
# Mock the route_impls to simulate finding routes with required scopes
from llama_stack.schema_utils import WebMethod
scoped_webmethod = WebMethod(route="/test/scoped", method="POST", required_scope="test.read")
public_webmethod = WebMethod(route="/test/public", method="GET")
# Mock the route finding logic
def mock_find_matching_route(method, path, route_impls):
if method == "POST" and path == "/test/scoped":
return None, {}, "/test/scoped", scoped_webmethod
elif method == "GET" and path == "/test/public":
return None, {}, "/test/public", public_webmethod
else:
raise ValueError("No matching route")
import llama_stack.distribution.server.auth
llama_stack.distribution.server.auth.find_matching_route = mock_find_matching_route
llama_stack.distribution.server.auth.initialize_route_impls = lambda impls: {}
return middleware, mock_app
async def mock_post_success(*args, **kwargs):
@ -138,6 +182,36 @@ async def mock_post_exception(*args, **kwargs):
raise Exception("Connection error")
async def mock_post_success_with_scope(*args, **kwargs):
"""Mock auth response for user with test.read scope"""
return MockResponse(
200,
{
"message": "Authentication successful",
"principal": "test-user",
"attributes": {
"scopes": ["test.read", "other.scope"],
"roles": ["user"],
},
},
)
async def mock_post_success_no_scope(*args, **kwargs):
"""Mock auth response for user without required scope"""
return MockResponse(
200,
{
"message": "Authentication successful",
"principal": "test-user",
"attributes": {
"scopes": ["other.scope"],
"roles": ["user"],
},
},
)
# HTTP Endpoint Tests
def test_missing_auth_header(http_client):
response = http_client.get("/test")
@ -252,7 +326,7 @@ def oauth2_app():
),
access_policy=[],
)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})
@app.get("/test")
def test_endpoint():
@ -351,7 +425,7 @@ def oauth2_app_with_jwks_token():
),
access_policy=[],
)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})
@app.get("/test")
def test_endpoint():
@ -442,7 +516,7 @@ def introspection_app(mock_introspection_endpoint):
),
access_policy=[],
)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})
@app.get("/test")
def test_endpoint():
@ -472,7 +546,7 @@ def introspection_app_with_custom_mapping(mock_introspection_endpoint):
),
access_policy=[],
)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config, impls={})
@app.get("/test")
def test_endpoint():
@ -581,3 +655,122 @@ def test_valid_introspection_with_custom_mapping_authentication(
)
assert response.status_code == 200
assert response.json() == {"message": "Authentication successful"}
# Scope-based authorization tests
@patch("httpx.AsyncClient.post", new=mock_post_success_with_scope)
async def test_scope_authorization_success(scope_middleware_with_mocks, valid_api_key):
"""Test that user with required scope can access protected endpoint"""
middleware, mock_app = scope_middleware_with_mocks
mock_receive = AsyncMock()
mock_send = AsyncMock()
scope = {
"type": "http",
"path": "/test/scoped",
"method": "POST",
"headers": [(b"authorization", f"Bearer {valid_api_key}".encode())],
}
await middleware(scope, mock_receive, mock_send)
# Should call the downstream app (no 403 error sent)
mock_app.assert_called_once_with(scope, mock_receive, mock_send)
mock_send.assert_not_called()
@patch("httpx.AsyncClient.post", new=mock_post_success_no_scope)
async def test_scope_authorization_denied(scope_middleware_with_mocks, valid_api_key):
"""Test that user without required scope gets 403 access denied"""
middleware, mock_app = scope_middleware_with_mocks
mock_receive = AsyncMock()
mock_send = AsyncMock()
scope = {
"type": "http",
"path": "/test/scoped",
"method": "POST",
"headers": [(b"authorization", f"Bearer {valid_api_key}".encode())],
}
await middleware(scope, mock_receive, mock_send)
# Should send 403 error, not call downstream app
mock_app.assert_not_called()
assert mock_send.call_count == 2 # start + body
# Check the response
start_call = mock_send.call_args_list[0][0][0]
assert start_call["status"] == 403
body_call = mock_send.call_args_list[1][0][0]
body_text = body_call["body"].decode()
assert "Access denied" in body_text
assert "test.read" in body_text
@patch("httpx.AsyncClient.post", new=mock_post_success_no_scope)
async def test_public_endpoint_no_scope_required(scope_middleware_with_mocks, valid_api_key):
"""Test that public endpoints work without specific scopes"""
middleware, mock_app = scope_middleware_with_mocks
mock_receive = AsyncMock()
mock_send = AsyncMock()
scope = {
"type": "http",
"path": "/test/public",
"method": "GET",
"headers": [(b"authorization", f"Bearer {valid_api_key}".encode())],
}
await middleware(scope, mock_receive, mock_send)
# Should call the downstream app (no error)
mock_app.assert_called_once_with(scope, mock_receive, mock_send)
mock_send.assert_not_called()
async def test_scope_authorization_no_auth_disabled(scope_middleware_with_mocks):
"""Test that when auth is disabled (no user), scope checks are bypassed"""
middleware, mock_app = scope_middleware_with_mocks
mock_receive = AsyncMock()
mock_send = AsyncMock()
scope = {
"type": "http",
"path": "/test/scoped",
"method": "POST",
"headers": [], # No authorization header
}
await middleware(scope, mock_receive, mock_send)
# Should send 401 auth error, not call downstream app
mock_app.assert_not_called()
assert mock_send.call_count == 2 # start + body
# Check the response
start_call = mock_send.call_args_list[0][0][0]
assert start_call["status"] == 401
body_call = mock_send.call_args_list[1][0][0]
body_text = body_call["body"].decode()
assert "Authentication required" in body_text
def test_has_required_scope_function():
"""Test the _has_required_scope function directly"""
# Test user with required scope
user_with_scope = User(principal="test-user", attributes={"scopes": ["test.read", "other.scope"]})
assert _has_required_scope("test.read", user_with_scope)
# Test user without required scope
user_without_scope = User(principal="test-user", attributes={"scopes": ["other.scope"]})
assert not _has_required_scope("test.read", user_without_scope)
# Test user with no scopes attribute
user_no_scopes = User(principal="test-user", attributes={})
assert not _has_required_scope("test.read", user_no_scopes)
# Test no user (auth disabled)
assert _has_required_scope("test.read", None)