precommit

This commit is contained in:
Ashwin Bharambe 2025-10-03 11:11:32 -07:00
parent 2a54a2433f
commit cbe7391574
6 changed files with 35 additions and 38 deletions

View file

@ -1,7 +1,7 @@
---
description: "Agents
APIs for creating and interacting with agentic systems."
APIs for creating and interacting with agentic systems."
sidebar_label: Agents
title: Agents
---
@ -12,6 +12,6 @@ title: Agents
Agents
APIs for creating and interacting with agentic systems.
APIs for creating and interacting with agentic systems.
This section contains documentation for all available providers for the **agents** API.

View file

@ -1,14 +1,14 @@
---
description: "The Batches API enables efficient processing of multiple requests in a single operation,
particularly useful for processing large datasets, batch evaluation workflows, and
cost-effective inference at scale.
particularly useful for processing large datasets, batch evaluation workflows, and
cost-effective inference at scale.
The API is designed to allow use of openai client libraries for seamless integration.
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes."
Note: This API is currently under active development and may undergo changes."
sidebar_label: Batches
title: Batches
---
@ -18,14 +18,14 @@ title: Batches
## Overview
The Batches API enables efficient processing of multiple requests in a single operation,
particularly useful for processing large datasets, batch evaluation workflows, and
cost-effective inference at scale.
particularly useful for processing large datasets, batch evaluation workflows, and
cost-effective inference at scale.
The API is designed to allow use of openai client libraries for seamless integration.
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
Note: This API is currently under active development and may undergo changes.
This section contains documentation for all available providers for the **batches** API.

View file

@ -1,9 +1,9 @@
---
description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
This API provides the raw interface to the underlying models. Two kinds of models are supported:
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search."
This API provides the raw interface to the underlying models. Two kinds of models are supported:
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search."
sidebar_label: Inference
title: Inference
---
@ -14,8 +14,8 @@ title: Inference
Llama Stack Inference API for generating completions, chat completions, and embeddings.
This API provides the raw interface to the underlying models. Two kinds of models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search.
This API provides the raw interface to the underlying models. Two kinds of models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search.
This section contains documentation for all available providers for the **inference** API.

View file

@ -819,7 +819,12 @@ class Agents(Protocol):
tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None,
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
shields: Annotated[list[ResponseShield] | None, ExtraBodyField("List of shields to apply during response generation. Shields provide safety and content moderation.")] = None,
shields: Annotated[
list[ResponseShield] | None,
ExtraBodyField(
"List of shields to apply during response generation. Shields provide safety and content moderation."
),
] = None,
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
"""Create a new OpenAI response.

View file

@ -6,15 +6,12 @@
from collections.abc import Callable
from dataclasses import dataclass
from typing import Any, Generic, TypeVar
from typing import Any
from .strong_typing.schema import json_schema_type, register_schema # noqa: F401
T = TypeVar("T")
class ExtraBodyField(Generic[T]):
class ExtraBodyField[T]:
"""
Marker annotation for parameters that arrive via extra_body in the client SDK.
@ -30,7 +27,9 @@ class ExtraBodyField(Generic[T]):
self,
input: str,
model: str,
shields: Annotated[list[str] | None, ExtraBodyField("List of shields to apply")] = None,
shields: Annotated[
list[str] | None, ExtraBodyField("List of shields to apply")
] = None,
) -> ResponseObject:
# shields is available here with proper typing
if shields:
@ -40,12 +39,11 @@ class ExtraBodyField(Generic[T]):
Client usage:
```python
client.responses.create(
input="hello",
model="llama-3",
extra_body={"shields": ["shield-1"]}
input="hello", model="llama-3", extra_body={"shields": ["shield-1"]}
)
```
"""
def __init__(self, description: str | None = None):
self.description = description

View file

@ -25,9 +25,7 @@ def test_shields_via_extra_body(compat_client, text_model_id):
model=text_model_id,
input="What is the capital of France?",
stream=False,
extra_body={
"shields": ["test-shield-1", "test-shield-2"]
}
extra_body={"shields": ["test-shield-1", "test-shield-2"]},
)
# Verify the error message indicates shields are not implemented
@ -35,8 +33,6 @@ def test_shields_via_extra_body(compat_client, text_model_id):
assert "not yet implemented" in error_message.lower() or "not implemented" in error_message.lower()
def test_response_without_shields_still_works(compat_client, text_model_id):
"""Test that responses still work without shields parameter (backwards compatibility)."""
@ -70,9 +66,7 @@ def test_shields_parameter_received_end_to_end(compat_client, text_model_id):
model=text_model_id,
input="Test message for shields verification",
stream=False,
extra_body={
"shields": ["shield-1", "shield-2"]
}
extra_body={"shields": ["shield-1", "shield-2"]},
)
# The NotImplementedError proves that: