feat(api): add extra_body parameter support with shields example (#3670)

## Summary
Introduce `ExtraBodyField` annotation to enable parameters that arrive
via extra_body in client SDKs but are accessible server-side with full
typing.

These parameters are documented in OpenAPI specs under
**`x-llama-stack-extra-body-params`** but excluded from generated SDK
signatures.

Add `shields` parameter to `create_openai_response` as the first
implementation using this pattern.

## Test Plan
- added an integration test which checks that shields parameter passed
via extra_body reaches server implementation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Ashwin Bharambe 2025-10-03 13:25:09 -07:00 committed by GitHub
parent 188a56af5c
commit 61b4238912
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 367 additions and 12 deletions

View file

@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
from llama_stack.apis.safety import SafetyViolation
from llama_stack.apis.tools import ToolDef
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod
from .openai_responses import (
ListOpenAIResponseInputItem,
@ -42,6 +42,20 @@ from .openai_responses import (
)
@json_schema_type
class ResponseShieldSpec(BaseModel):
"""Specification for a shield to apply during response generation.
:param type: The type/identifier of the shield.
"""
type: str
# TODO: more fields to be added for shield configuration
ResponseShield = str | ResponseShieldSpec
class Attachment(BaseModel):
"""An attachment to an agent turn.
@ -805,6 +819,12 @@ class Agents(Protocol):
tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None,
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
shields: Annotated[
list[ResponseShield] | None,
ExtraBodyField(
"List of shields to apply during response generation. Shields provide safety and content moderation."
),
] = None,
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
"""Create a new OpenAI response.
@ -812,6 +832,7 @@ class Agents(Protocol):
:param model: The underlying LLM used for completions.
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
:param include: (Optional) Additional fields to include in the response.
:param shields: (Optional) List of shields to apply during response generation. Can be shield IDs (strings) or shield specifications.
:returns: An OpenAIResponseObject.
"""
...

View file

@ -374,6 +374,10 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
body = options.params or {}
body |= options.json_data or {}
# Merge extra_json parameters (extra_body from SDK is converted to extra_json)
if hasattr(options, "extra_json") and options.extra_json:
body |= options.extra_json
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
body |= path_params

View file

@ -329,6 +329,7 @@ class MetaReferenceAgentsImpl(Agents):
tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None,
max_infer_iters: int | None = 10,
shields: list | None = None,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response(
input,
@ -342,6 +343,7 @@ class MetaReferenceAgentsImpl(Agents):
tools,
include,
max_infer_iters,
shields,
)
async def list_openai_responses(

View file

@ -208,10 +208,15 @@ class OpenAIResponsesImpl:
tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None,
max_infer_iters: int | None = 10,
shields: list | None = None,
):
stream = bool(stream)
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
# Shields parameter received via extra_body - not yet implemented
if shields is not None:
raise NotImplementedError("Shields parameter is not yet implemented in the meta-reference provider")
stream_gen = self._create_streaming_response(
input=input,
model=model,

View file

@ -11,6 +11,43 @@ from typing import Any, TypeVar
from .strong_typing.schema import json_schema_type, register_schema # noqa: F401
class ExtraBodyField[T]:
"""
Marker annotation for parameters that arrive via extra_body in the client SDK.
These parameters:
- Will NOT appear in the generated client SDK method signature
- WILL be documented in OpenAPI spec under x-llama-stack-extra-body-params
- MUST be passed via the extra_body parameter in client SDK calls
- WILL be available in server-side method signature with proper typing
Example:
```python
async def create_openai_response(
self,
input: str,
model: str,
shields: Annotated[
list[str] | None, ExtraBodyField("List of shields to apply")
] = None,
) -> ResponseObject:
# shields is available here with proper typing
if shields:
print(f"Using shields: {shields}")
```
Client usage:
```python
client.responses.create(
input="hello", model="llama-3", extra_body={"shields": ["shield-1"]}
)
```
"""
def __init__(self, description: str | None = None):
self.description = description
@dataclass
class WebMethod:
level: str | None = None
@ -26,7 +63,7 @@ class WebMethod:
deprecated: bool | None = False
T = TypeVar("T", bound=Callable[..., Any])
CallableT = TypeVar("CallableT", bound=Callable[..., Any])
def webmethod(
@ -40,7 +77,7 @@ def webmethod(
descriptive_name: str | None = None,
required_scope: str | None = None,
deprecated: bool | None = False,
) -> Callable[[T], T]:
) -> Callable[[CallableT], CallableT]:
"""
Decorator that supplies additional metadata to an endpoint operation function.
@ -51,7 +88,7 @@ def webmethod(
:param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer').
"""
def wrap(func: T) -> T:
def wrap(func: CallableT) -> CallableT:
webmethod_obj = WebMethod(
route=route,
method=method,