diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx index 06eb104af..52b92734e 100644 --- a/docs/docs/providers/agents/index.mdx +++ b/docs/docs/providers/agents/index.mdx @@ -1,7 +1,7 @@ --- description: "Agents - APIs for creating and interacting with agentic systems." +APIs for creating and interacting with agentic systems." sidebar_label: Agents title: Agents --- @@ -12,6 +12,6 @@ title: Agents Agents - APIs for creating and interacting with agentic systems. +APIs for creating and interacting with agentic systems. This section contains documentation for all available providers for the **agents** API. diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx index 2c64b277f..18e5e314d 100644 --- a/docs/docs/providers/batches/index.mdx +++ b/docs/docs/providers/batches/index.mdx @@ -1,14 +1,14 @@ --- description: "The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. +The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation +This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes." +Note: This API is currently under active development and may undergo changes." sidebar_label: Batches title: Batches --- @@ -18,14 +18,14 @@ title: Batches ## Overview The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. +The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation +This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes. +Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index ebbaf1be1..1dc479675 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -1,9 +1,9 @@ --- description: "Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: - - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search." +This API provides the raw interface to the underlying models. Two kinds of models are supported: +- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. +- Embedding models: these models generate embeddings to be used for semantic search." sidebar_label: Inference title: Inference --- @@ -14,8 +14,8 @@ title: Inference Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search. +This API provides the raw interface to the underlying models. Two kinds of models are supported: +- LLM models: these models generate "raw" and "chat" (conversational) completions. +- Embedding models: these models generate embeddings to be used for semantic search. This section contains documentation for all available providers for the **inference** API. diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index 8be36b92f..cdf47308e 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -819,7 +819,12 @@ class Agents(Protocol): tools: list[OpenAIResponseInputTool] | None = None, include: list[str] | None = None, max_infer_iters: int | None = 10, # this is an extension to the OpenAI API - shields: Annotated[list[ResponseShield] | None, ExtraBodyField("List of shields to apply during response generation. Shields provide safety and content moderation.")] = None, + shields: Annotated[ + list[ResponseShield] | None, + ExtraBodyField( + "List of shields to apply during response generation. Shields provide safety and content moderation." + ), + ] = None, ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]: """Create a new OpenAI response. diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py index f3c0b5942..8e6c53cc7 100644 --- a/llama_stack/schema_utils.py +++ b/llama_stack/schema_utils.py @@ -6,15 +6,12 @@ from collections.abc import Callable from dataclasses import dataclass -from typing import Any, Generic, TypeVar +from typing import Any from .strong_typing.schema import json_schema_type, register_schema # noqa: F401 -T = TypeVar("T") - - -class ExtraBodyField(Generic[T]): +class ExtraBodyField[T]: """ Marker annotation for parameters that arrive via extra_body in the client SDK. @@ -30,7 +27,9 @@ class ExtraBodyField(Generic[T]): self, input: str, model: str, - shields: Annotated[list[str] | None, ExtraBodyField("List of shields to apply")] = None, + shields: Annotated[ + list[str] | None, ExtraBodyField("List of shields to apply") + ] = None, ) -> ResponseObject: # shields is available here with proper typing if shields: @@ -40,12 +39,11 @@ class ExtraBodyField(Generic[T]): Client usage: ```python client.responses.create( - input="hello", - model="llama-3", - extra_body={"shields": ["shield-1"]} + input="hello", model="llama-3", extra_body={"shields": ["shield-1"]} ) ``` """ + def __init__(self, description: str | None = None): self.description = description diff --git a/tests/integration/responses/test_extra_body_shields.py b/tests/integration/responses/test_extra_body_shields.py index b0c6ec39a..f20cd24ba 100644 --- a/tests/integration/responses/test_extra_body_shields.py +++ b/tests/integration/responses/test_extra_body_shields.py @@ -25,9 +25,7 @@ def test_shields_via_extra_body(compat_client, text_model_id): model=text_model_id, input="What is the capital of France?", stream=False, - extra_body={ - "shields": ["test-shield-1", "test-shield-2"] - } + extra_body={"shields": ["test-shield-1", "test-shield-2"]}, ) # Verify the error message indicates shields are not implemented @@ -35,8 +33,6 @@ def test_shields_via_extra_body(compat_client, text_model_id): assert "not yet implemented" in error_message.lower() or "not implemented" in error_message.lower() - - def test_response_without_shields_still_works(compat_client, text_model_id): """Test that responses still work without shields parameter (backwards compatibility).""" @@ -70,9 +66,7 @@ def test_shields_parameter_received_end_to_end(compat_client, text_model_id): model=text_model_id, input="Test message for shields verification", stream=False, - extra_body={ - "shields": ["shield-1", "shield-2"] - } + extra_body={"shields": ["shield-1", "shield-2"]}, ) # The NotImplementedError proves that: