precommit

2025-10-03 19:57:35 +00:00 · 2025-10-03 11:11:32 -07:00 · 2025-10-03 11:11:32 -07:00 · cbe7391574
commit cbe7391574
parent 2a54a2433f
6 changed files with 35 additions and 38 deletions
--- a/docs/docs/providers/agents/index.mdx
+++ b/docs/docs/providers/agents/index.mdx
@ -1,7 +1,7 @@
 ---
 description: "Agents
-    APIs for creating and interacting with agentic systems."
+APIs for creating and interacting with agentic systems."
 sidebar_label: Agents
 title: Agents
 ---
@ -12,6 +12,6 @@ title: Agents
 Agents
-    APIs for creating and interacting with agentic systems.
+APIs for creating and interacting with agentic systems.
 This section contains documentation for all available providers for the **agents** API.
--- a/docs/docs/providers/batches/index.mdx
+++ b/docs/docs/providers/batches/index.mdx
@ -1,14 +1,14 @@
 ---
 description: "The Batches API enables efficient processing of multiple requests in a single operation,
-    particularly useful for processing large datasets, batch evaluation workflows, and
+particularly useful for processing large datasets, batch evaluation workflows, and
-    cost-effective inference at scale.
+cost-effective inference at scale.
-    The API is designed to allow use of openai client libraries for seamless integration.
+The API is designed to allow use of openai client libraries for seamless integration.
-    This API provides the following extensions:
+This API provides the following extensions:
-     - idempotent batch creation
+ - idempotent batch creation
-    Note: This API is currently under active development and may undergo changes."
+Note: This API is currently under active development and may undergo changes."
 sidebar_label: Batches
 title: Batches
 ---
@ -18,14 +18,14 @@ title: Batches
 ## Overview
 The Batches API enables efficient processing of multiple requests in a single operation,
-    particularly useful for processing large datasets, batch evaluation workflows, and
+particularly useful for processing large datasets, batch evaluation workflows, and
-    cost-effective inference at scale.
+cost-effective inference at scale.
-    The API is designed to allow use of openai client libraries for seamless integration.
+The API is designed to allow use of openai client libraries for seamless integration.
-    This API provides the following extensions:
+This API provides the following extensions:
-     - idempotent batch creation
+ - idempotent batch creation
-    Note: This API is currently under active development and may undergo changes.
+Note: This API is currently under active development and may undergo changes.
 This section contains documentation for all available providers for the **batches** API.
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@ -1,9 +1,9 @@
 ---
 description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+This API provides the raw interface to the underlying models. Two kinds of models are supported:
-    - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
+- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search."
+- Embedding models: these models generate embeddings to be used for semantic search."
 sidebar_label: Inference
 title: Inference
 ---
@ -14,8 +14,8 @@ title: Inference
 Llama Stack Inference API for generating completions, chat completions, and embeddings.
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+This API provides the raw interface to the underlying models. Two kinds of models are supported:
-    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+- LLM models: these models generate "raw" and "chat" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search.
+- Embedding models: these models generate embeddings to be used for semantic search.
 This section contains documentation for all available providers for the **inference** API.
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -819,7 +819,12 @@ class Agents(Protocol):
        tools: list[OpenAIResponseInputTool] | None = None,
        include: list[str] | None = None,
        max_infer_iters: int | None = 10,  # this is an extension to the OpenAI API
-        shields: Annotated[list[ResponseShield] | None, ExtraBodyField("List of shields to apply during response generation. Shields provide safety and content moderation.")] = None,
+        shields: Annotated[
            list[ResponseShield] | None,
            ExtraBodyField(
                "List of shields to apply during response generation. Shields provide safety and content moderation."
            ),
        ] = None,
    ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
        """Create a new OpenAI response.
--- a/llama_stack/schema_utils.py
+++ b/llama_stack/schema_utils.py
@ -6,15 +6,12 @@
 from collections.abc import Callable
 from dataclasses import dataclass
-from typing import Any, Generic, TypeVar
+from typing import Any
 from .strong_typing.schema import json_schema_type, register_schema  # noqa: F401
-T = TypeVar("T")
+class ExtraBodyField[T]:
 class ExtraBodyField(Generic[T]):
    """
    Marker annotation for parameters that arrive via extra_body in the client SDK.
@ -30,7 +27,9 @@ class ExtraBodyField(Generic[T]):
            self,
            input: str,
            model: str,
-            shields: Annotated[list[str] | None, ExtraBodyField("List of shields to apply")] = None,
+            shields: Annotated[
                list[str] | None, ExtraBodyField("List of shields to apply")
            ] = None,
        ) -> ResponseObject:
            # shields is available here with proper typing
            if shields:
@ -40,12 +39,11 @@ class ExtraBodyField(Generic[T]):
        Client usage:
        ```python
        client.responses.create(
-            input="hello",
+            input="hello", model="llama-3", extra_body={"shields": ["shield-1"]}
            model="llama-3",
            extra_body={"shields": ["shield-1"]}
        )
        ```
    """
    def __init__(self, description: str | None = None):
        self.description = description
--- a/tests/integration/responses/test_extra_body_shields.py
+++ b/tests/integration/responses/test_extra_body_shields.py
@ -25,9 +25,7 @@ def test_shields_via_extra_body(compat_client, text_model_id):
            model=text_model_id,
            input="What is the capital of France?",
            stream=False,
-            extra_body={
+            extra_body={"shields": ["test-shield-1", "test-shield-2"]},
                "shields": ["test-shield-1", "test-shield-2"]
            }
        )
    # Verify the error message indicates shields are not implemented
@ -35,8 +33,6 @@ def test_shields_via_extra_body(compat_client, text_model_id):
    assert "not yet implemented" in error_message.lower() or "not implemented" in error_message.lower()
 def test_response_without_shields_still_works(compat_client, text_model_id):
    """Test that responses still work without shields parameter (backwards compatibility)."""
@ -70,9 +66,7 @@ def test_shields_parameter_received_end_to_end(compat_client, text_model_id):
            model=text_model_id,
            input="Test message for shields verification",
            stream=False,
-            extra_body={
+            extra_body={"shields": ["shield-1", "shield-2"]},
                "shields": ["shield-1", "shield-2"]
            }
        )
    # The NotImplementedError proves that: