mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
Added fields for guardrail configuration including description, enabled, severity, action, policy_id, version, categories, thresholds, max_violations, config, tags, and metadata.
202 lines
8.4 KiB
Python
202 lines
8.4 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from collections.abc import AsyncIterator
|
|
from typing import Annotated, Protocol, runtime_checkable, Any, Literal
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from llama_stack_api.common.responses import Order
|
|
from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
|
|
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
|
|
from .openai_responses import (
|
|
ListOpenAIResponseInputItem,
|
|
ListOpenAIResponseObject,
|
|
OpenAIDeleteResponseObject,
|
|
OpenAIResponseInput,
|
|
OpenAIResponseInputTool,
|
|
OpenAIResponseObject,
|
|
OpenAIResponseObjectStream,
|
|
OpenAIResponsePrompt,
|
|
OpenAIResponseText,
|
|
)
|
|
|
|
|
|
@json_schema_type
|
|
class ResponseGuardrailSpec(BaseModel):
|
|
"""Specification for a guardrail to apply during response generation.
|
|
|
|
Production-focused configuration allowing safety, moderation, and policy controls.
|
|
|
|
Fields
|
|
------
|
|
type: Identifier for the guardrail implementation (e.g. 'llama-guard', 'content-filter').
|
|
description: Human readable explanation / purpose.
|
|
enabled: Whether enforcement is active (default True).
|
|
severity: Severity classification for violations (info | warn | block).
|
|
action: Action when violation occurs (flag | block | redact | annotate). If omitted, provider default applies.
|
|
policy_id: Optional external policy/reference identifier to map violations to organizational policy.
|
|
version: Optional version of this guardrail configuration (for audit/rollback).
|
|
categories: List of safety/moderation categories this guardrail targets (e.g. ['violence','self-harm']).
|
|
thresholds: Per-category numeric thresholds (e.g. {'violence':0.8}). Semantics depend on provider.
|
|
max_violations: If set, cap number of violations before early termination.
|
|
config: Provider/model specific free-form settings (nested allowed).
|
|
tags: Arbitrary labels to assist analytics/telemetry and routing.
|
|
metadata: Arbitrary supplemental structured metadata for downstream logging.
|
|
"""
|
|
|
|
type: str
|
|
description: str | None = None
|
|
enabled: bool = True
|
|
severity: Literal["info", "warn", "block"] | None = None
|
|
action: Literal["flag", "block", "redact", "annotate"] | None = None
|
|
policy_id: str | None = None
|
|
version: str | None = None
|
|
categories: list[str] | None = None
|
|
thresholds: dict[str, float] | None = None
|
|
max_violations: int | None = None
|
|
config: dict[str, Any] | None = None
|
|
tags: list[str] | None = None
|
|
metadata: dict[str, Any] | None = None
|
|
|
|
model_config = {
|
|
"extra": "forbid",
|
|
"title": "ResponseGuardrailSpec",
|
|
}
|
|
|
|
@classmethod
|
|
def _non_empty(cls, value: str, field_name: str) -> str: # internal helper
|
|
if not value or not value.strip():
|
|
raise ValueError(f"{field_name} cannot be empty")
|
|
return value
|
|
|
|
@classmethod
|
|
def validate(cls, value: Any): # pydantic v2 uses model validators; minimal safeguard here if invoked directly
|
|
return value
|
|
|
|
def normalized(self) -> "ResponseGuardrailSpec":
|
|
"""Return a normalized copy (e.g., lower-casing categories, stripping whitespace)."""
|
|
if self.categories:
|
|
object.__setattr__(self, "categories", [c.strip().lower() for c in self.categories])
|
|
return self
|
|
|
|
|
|
ResponseGuardrail = str | ResponseGuardrailSpec
|
|
|
|
|
|
@runtime_checkable
|
|
class Agents(Protocol):
|
|
"""Agents
|
|
|
|
APIs for creating and interacting with agentic systems."""
|
|
|
|
# We situate the OpenAI Responses API in the Agents API just like we did things
|
|
# for Inference. The Responses API, in its intent, serves the same purpose as
|
|
# the Agents API above -- it is essentially a lightweight "agentic loop" with
|
|
# integrated tool calling.
|
|
#
|
|
# Both of these APIs are inherently stateful.
|
|
|
|
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
async def get_openai_response(
|
|
self,
|
|
response_id: str,
|
|
) -> OpenAIResponseObject:
|
|
"""Get a model response.
|
|
|
|
:param response_id: The ID of the OpenAI response to retrieve.
|
|
:returns: An OpenAIResponseObject.
|
|
"""
|
|
...
|
|
|
|
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
|
|
async def create_openai_response(
|
|
self,
|
|
input: str | list[OpenAIResponseInput],
|
|
model: str,
|
|
prompt: OpenAIResponsePrompt | None = None,
|
|
instructions: str | None = None,
|
|
parallel_tool_calls: bool | None = True,
|
|
previous_response_id: str | None = None,
|
|
conversation: str | None = None,
|
|
store: bool | None = True,
|
|
stream: bool | None = False,
|
|
temperature: float | None = None,
|
|
text: OpenAIResponseText | None = None,
|
|
tools: list[OpenAIResponseInputTool] | None = None,
|
|
include: list[str] | None = None,
|
|
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
|
guardrails: Annotated[
|
|
list[ResponseGuardrail] | None,
|
|
ExtraBodyField(
|
|
"List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
|
|
),
|
|
] = None,
|
|
max_tool_calls: int | None = None,
|
|
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
|
"""Create a model response.
|
|
|
|
:param input: Input message(s) to create the response.
|
|
:param model: The underlying LLM used for completions.
|
|
:param prompt: (Optional) Prompt object with ID, version, and variables.
|
|
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
|
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
|
|
:param include: (Optional) Additional fields to include in the response.
|
|
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
|
|
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
|
|
:returns: An OpenAIResponseObject.
|
|
"""
|
|
...
|
|
|
|
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
|
|
async def list_openai_responses(
|
|
self,
|
|
after: str | None = None,
|
|
limit: int | None = 50,
|
|
model: str | None = None,
|
|
order: Order | None = Order.desc,
|
|
) -> ListOpenAIResponseObject:
|
|
"""List all responses.
|
|
|
|
:param after: The ID of the last response to return.
|
|
:param limit: The number of responses to return.
|
|
:param model: The model to filter responses by.
|
|
:param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
|
|
:returns: A ListOpenAIResponseObject.
|
|
"""
|
|
...
|
|
|
|
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
|
async def list_openai_response_input_items(
|
|
self,
|
|
response_id: str,
|
|
after: str | None = None,
|
|
before: str | None = None,
|
|
include: list[str] | None = None,
|
|
limit: int | None = 20,
|
|
order: Order | None = Order.desc,
|
|
) -> ListOpenAIResponseInputItem:
|
|
"""List input items.
|
|
|
|
:param response_id: The ID of the response to retrieve input items for.
|
|
:param after: An item ID to list items after, used for pagination.
|
|
:param before: An item ID to list items before, used for pagination.
|
|
:param include: Additional fields to include in the response.
|
|
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
|
|
:param order: The order to return the input items in. Default is desc.
|
|
:returns: An ListOpenAIResponseInputItem.
|
|
"""
|
|
...
|
|
|
|
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
"""Delete a response.
|
|
|
|
:param response_id: The ID of the OpenAI response to delete.
|
|
:returns: An OpenAIDeleteResponseObject
|
|
"""
|
|
...
|