Merge branch 'main' into fix/issue-3797-metadata-validation

2025-12-06 10:37:22 +00:00 · 2025-11-19 10:09:30 -08:00 · 2025-11-19 10:09:30 -08:00 · 0358770791
commit 0358770791
parent aeb3f80966 1e4e02e622
90 changed files with 6769 additions and 247 deletions
--- a/tests/unit/providers/agents/meta_reference/test_safety_optional.py
+++ b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
@ -0,0 +1,206 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Tests for making Safety API optional in meta-reference agents provider.
+
+This test suite validates the changes introduced to fix issue #4165, which
+allows running the meta-reference agents provider without the Safety API.
+Safety API is now an optional dependency, and errors are raised at request time
+when guardrails are explicitly requested without Safety API configured.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from llama_stack.core.datatypes import Api
+from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
+from llama_stack.providers.inline.agents.meta_reference import get_provider_impl
+from llama_stack.providers.inline.agents.meta_reference.config import (
+    AgentPersistenceConfig,
+    MetaReferenceAgentsImplConfig,
+)
+from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    run_guardrails,
+)
+
+
+@pytest.fixture
+def mock_persistence_config():
+    """Create a mock persistence configuration."""
+    return AgentPersistenceConfig(
+        agent_state=KVStoreReference(
+            backend="kv_default",
+            namespace="agents",
+        ),
+        responses=ResponsesStoreReference(
+            backend="sql_default",
+            table_name="responses",
+        ),
+    )
+
+
+@pytest.fixture
+def mock_deps():
+    """Create mock dependencies for the agents provider."""
+    # Create mock APIs
+    inference_api = AsyncMock()
+    vector_io_api = AsyncMock()
+    tool_runtime_api = AsyncMock()
+    tool_groups_api = AsyncMock()
+    conversations_api = AsyncMock()
+
+    return {
+        Api.inference: inference_api,
+        Api.vector_io: vector_io_api,
+        Api.tool_runtime: tool_runtime_api,
+        Api.tool_groups: tool_groups_api,
+        Api.conversations: conversations_api,
+    }
+
+
+class TestProviderInitialization:
+    """Test provider initialization with different safety API configurations."""
+
+    async def test_initialization_with_safety_api_present(self, mock_persistence_config, mock_deps):
+        """Test successful initialization when Safety API is configured."""
+        config = MetaReferenceAgentsImplConfig(persistence=mock_persistence_config)
+
+        # Add safety API to deps
+        safety_api = AsyncMock()
+        mock_deps[Api.safety] = safety_api
+
+        # Mock the initialize method to avoid actual initialization
+        with patch(
+            "llama_stack.providers.inline.agents.meta_reference.agents.MetaReferenceAgentsImpl.initialize",
+            new_callable=AsyncMock,
+        ):
+            # Should not raise any exception
+            provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+            assert provider is not None
+
+    async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
+        """Test successful initialization when Safety API is not configured."""
+        config = MetaReferenceAgentsImplConfig(persistence=mock_persistence_config)
+
+        # Safety API is NOT in mock_deps - provider should still start
+        # Mock the initialize method to avoid actual initialization
+        with patch(
+            "llama_stack.providers.inline.agents.meta_reference.agents.MetaReferenceAgentsImpl.initialize",
+            new_callable=AsyncMock,
+        ):
+            # Should not raise any exception
+            provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+            assert provider is not None
+            assert provider.safety_api is None
+
+
+class TestGuardrailsFunctionality:
+    """Test run_guardrails function with optional safety API."""
+
+    async def test_run_guardrails_with_none_safety_api(self):
+        """Test that run_guardrails returns None when safety_api is None."""
+        result = await run_guardrails(safety_api=None, messages="test message", guardrail_ids=["llama-guard"])
+        assert result is None
+
+    async def test_run_guardrails_with_empty_messages(self):
+        """Test that run_guardrails returns None for empty messages."""
+        # Test with None safety API
+        result = await run_guardrails(safety_api=None, messages="", guardrail_ids=["llama-guard"])
+        assert result is None
+
+        # Test with mock safety API
+        mock_safety_api = AsyncMock()
+        result = await run_guardrails(safety_api=mock_safety_api, messages="", guardrail_ids=["llama-guard"])
+        assert result is None
+
+    async def test_run_guardrails_with_none_safety_api_ignores_guardrails(self):
+        """Test that guardrails are skipped when safety_api is None, even if guardrail_ids are provided."""
+        # Should not raise exception, just return None
+        result = await run_guardrails(
+            safety_api=None,
+            messages="potentially harmful content",
+            guardrail_ids=["llama-guard", "content-filter"],
+        )
+        assert result is None
+
+    async def test_create_response_rejects_guardrails_without_safety_api(self, mock_persistence_config, mock_deps):
+        """Test that create_openai_response raises error when guardrails requested but Safety API unavailable."""
+        from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+            OpenAIResponsesImpl,
+        )
+        from llama_stack_api import ResponseGuardrailSpec
+
+        # Create OpenAIResponsesImpl with no safety API
+        with patch("llama_stack.providers.inline.agents.meta_reference.responses.openai_responses.ResponsesStore"):
+            impl = OpenAIResponsesImpl(
+                inference_api=mock_deps[Api.inference],
+                tool_groups_api=mock_deps[Api.tool_groups],
+                tool_runtime_api=mock_deps[Api.tool_runtime],
+                responses_store=MagicMock(),
+                vector_io_api=mock_deps[Api.vector_io],
+                safety_api=None,  # No Safety API
+                conversations_api=mock_deps[Api.conversations],
+            )
+
+            # Test with string guardrail
+            with pytest.raises(ValueError) as exc_info:
+                await impl.create_openai_response(
+                    input="test input",
+                    model="test-model",
+                    guardrails=["llama-guard"],
+                )
+            assert "Cannot process guardrails: Safety API is not configured" in str(exc_info.value)
+
+            # Test with ResponseGuardrailSpec
+            with pytest.raises(ValueError) as exc_info:
+                await impl.create_openai_response(
+                    input="test input",
+                    model="test-model",
+                    guardrails=[ResponseGuardrailSpec(type="llama-guard")],
+                )
+            assert "Cannot process guardrails: Safety API is not configured" in str(exc_info.value)
+
+    async def test_create_response_succeeds_without_guardrails_and_no_safety_api(
+        self, mock_persistence_config, mock_deps
+    ):
+        """Test that create_openai_response works when no guardrails requested and Safety API unavailable."""
+        from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+            OpenAIResponsesImpl,
+        )
+
+        # Create OpenAIResponsesImpl with no safety API
+        with (
+            patch("llama_stack.providers.inline.agents.meta_reference.responses.openai_responses.ResponsesStore"),
+            patch.object(OpenAIResponsesImpl, "_create_streaming_response", new_callable=AsyncMock) as mock_stream,
+        ):
+            # Mock the streaming response to return a simple async generator
+            async def mock_generator():
+                yield MagicMock()
+
+            mock_stream.return_value = mock_generator()
+
+            impl = OpenAIResponsesImpl(
+                inference_api=mock_deps[Api.inference],
+                tool_groups_api=mock_deps[Api.tool_groups],
+                tool_runtime_api=mock_deps[Api.tool_runtime],
+                responses_store=MagicMock(),
+                vector_io_api=mock_deps[Api.vector_io],
+                safety_api=None,  # No Safety API
+                conversations_api=mock_deps[Api.conversations],
+            )
+
+            # Should not raise when no guardrails requested
+            # Note: This will still fail later in execution due to mocking, but should pass the validation
+            try:
+                await impl.create_openai_response(
+                    input="test input",
+                    model="test-model",
+                    guardrails=None,  # No guardrails
+                )
+            except Exception as e:
+                # Ensure the error is NOT about missing Safety API
+                assert "Cannot process guardrails: Safety API is not configured" not in str(e)
--- a/tests/unit/providers/inference/test_inference_client_caching.py
+++ b/tests/unit/providers/inference/test_inference_client_caching.py
@ -120,7 +120,7 @@ from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInfere
            VLLMInferenceAdapter,
            "llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator",
            {
-                "url": "http://fake",
+                "base_url": "http://fake",
            },
        ),
    ],
@ -153,7 +153,7 @@ def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_valid
    """Validate data for LiteLLM-based providers.  Similar to test_openai_provider_data_used, but without the
    assumption that there is an OpenAI-compatible client object."""

-    inference_adapter = adapter_cls(config=config_cls())
+    inference_adapter = adapter_cls(config=config_cls(base_url="http://fake"))

    inference_adapter.__provider_spec__ = MagicMock()
    inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@ -40,7 +40,7 @@ from llama_stack_api import (

@pytest.fixture(scope="function")
 async def vllm_inference_adapter():
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
    inference_adapter = VLLMInferenceAdapter(config=config)
    inference_adapter.model_store = AsyncMock()
    await inference_adapter.initialize()
@ -204,7 +204,7 @@ async def test_vllm_completion_extra_body():
    via extra_body to the underlying OpenAI client through the InferenceRouter.
    """
    # Set up the vLLM adapter
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
    vllm_adapter = VLLMInferenceAdapter(config=config)
    vllm_adapter.__provider_id__ = "vllm"
    await vllm_adapter.initialize()
@ -277,7 +277,7 @@ async def test_vllm_chat_completion_extra_body():
    via extra_body to the underlying OpenAI client through the InferenceRouter for chat completion.
    """
    # Set up the vLLM adapter
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
    vllm_adapter = VLLMInferenceAdapter(config=config)
    vllm_adapter.__provider_id__ = "vllm"
    await vllm_adapter.initialize()
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@ -146,7 +146,7 @@ async def test_hosted_model_not_in_endpoint_mapping():

 async def test_self_hosted_ignores_endpoint():
    adapter = create_adapter(
-        config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
+        config=NVIDIAConfig(base_url="http://localhost:8000", api_key=None),
        rerank_endpoints={"test-model": "https://model.endpoint/rerank"},  # This should be ignored for self-hosted.
    )
    mock_session = MockSession(MockResponse())
--- a/tests/unit/providers/test_configs.py
+++ b/tests/unit/providers/test_configs.py
@ -4,8 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+from typing import get_args, get_origin
+
 import pytest
-from pydantic import BaseModel
+from pydantic import BaseModel, HttpUrl

 from llama_stack.core.distribution import get_provider_registry, providable_apis
 from llama_stack.core.utils.dynamic import instantiate_class_type
@ -41,3 +43,55 @@ class TestProviderConfigurations:

        sample_config = config_type.sample_run_config(__distro_dir__="foobarbaz")
        assert isinstance(sample_config, dict), f"{config_class_name}.sample_run_config() did not return a dict"
+
+    def test_remote_inference_url_standardization(self):
+        """Verify all remote inference providers use standardized base_url configuration."""
+        provider_registry = get_provider_registry()
+        inference_providers = provider_registry.get("inference", {})
+
+        # Filter for remote providers only
+        remote_providers = {k: v for k, v in inference_providers.items() if k.startswith("remote::")}
+
+        failures = []
+        for provider_type, provider_spec in remote_providers.items():
+            try:
+                config_class_name = provider_spec.config_class
+                config_type = instantiate_class_type(config_class_name)
+
+                # Check that config has base_url field (not url)
+                if hasattr(config_type, "model_fields"):
+                    fields = config_type.model_fields
+
+                    # Should NOT have 'url' field (old pattern)
+                    if "url" in fields:
+                        failures.append(
+                            f"{provider_type}: Uses deprecated 'url' field instead of 'base_url'. "
+                            f"Please rename to 'base_url' for consistency."
+                        )
+
+                    # Should have 'base_url' field with HttpUrl | None type
+                    if "base_url" in fields:
+                        field_info = fields["base_url"]
+                        annotation = field_info.annotation
+
+                        # Check if it's HttpUrl or HttpUrl | None
+                        # get_origin() returns Union for (X | Y), None for plain types
+                        # get_args() returns the types inside Union, e.g. (HttpUrl, NoneType)
+                        is_valid = False
+                        if get_origin(annotation) is not None:  # It's a Union/Optional
+                            if HttpUrl in get_args(annotation):
+                                is_valid = True
+                        elif annotation == HttpUrl:  # Plain HttpUrl without | None
+                            is_valid = True
+
+                        if not is_valid:
+                            failures.append(
+                                f"{provider_type}: base_url field has incorrect type annotation. "
+                                f"Expected 'HttpUrl | None', got '{annotation}'"
+                            )
+
+            except Exception as e:
+                failures.append(f"{provider_type}: Error checking URL standardization: {str(e)}")
+
+        if failures:
+            pytest.fail("URL standardization violations found:\n" + "\n".join(f"  - {f}" for f in failures))