llama-stack-mirror/src/llama_stack_api/agents.py

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

from collections.abc import AsyncIterator
from typing import Annotated, Protocol, runtime_checkable, Any, Literal

from pydantic import BaseModel

from llama_stack_api.common.responses import Order
from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1

from .openai_responses import (
    ListOpenAIResponseInputItem,
    ListOpenAIResponseObject,
    OpenAIDeleteResponseObject,
    OpenAIResponseInput,
    OpenAIResponseInputTool,
    OpenAIResponseObject,
    OpenAIResponseObjectStream,
    OpenAIResponsePrompt,
    OpenAIResponseText,
)


@json_schema_type
class ResponseGuardrailSpec(BaseModel):
    """Specification for a guardrail to apply during response generation.

    Production-focused configuration allowing safety, moderation, and policy controls.

    Fields
    ------
    type: Identifier for the guardrail implementation (e.g. 'llama-guard', 'content-filter').
    description: Human readable explanation / purpose.
    enabled: Whether enforcement is active (default True).
    severity: Severity classification for violations (info | warn | block).
    action: Action when violation occurs (flag | block | redact | annotate). If omitted, provider default applies.
    policy_id: Optional external policy/reference identifier to map violations to organizational policy.
    version: Optional version of this guardrail configuration (for audit/rollback).
    categories: List of safety/moderation categories this guardrail targets (e.g. ['violence','self-harm']).
    thresholds: Per-category numeric thresholds (e.g. {'violence':0.8}). Semantics depend on provider.
    max_violations: If set, cap number of violations before early termination.
    config: Provider/model specific free-form settings (nested allowed).
    tags: Arbitrary labels to assist analytics/telemetry and routing.
    metadata: Arbitrary supplemental structured metadata for downstream logging.
    """

    type: str
    description: str | None = None
    enabled: bool = True
    severity: Literal["info", "warn", "block"] | None = None
    action: Literal["flag", "block", "redact", "annotate"] | None = None
    policy_id: str | None = None
    version: str | None = None
    categories: list[str] | None = None
    thresholds: dict[str, float] | None = None
    max_violations: int | None = None
    config: dict[str, Any] | None = None
    tags: list[str] | None = None
    metadata: dict[str, Any] | None = None

    model_config = {
        "extra": "forbid",
        "title": "ResponseGuardrailSpec",
    }

    @classmethod
    def _non_empty(cls, value: str, field_name: str) -> str:  # internal helper
        if not value or not value.strip():
            raise ValueError(f"{field_name} cannot be empty")
        return value

    @classmethod
    def validate(cls, value: Any):  # pydantic v2 uses model validators; minimal safeguard here if invoked directly
        return value

    def normalized(self) -> "ResponseGuardrailSpec":
        """Return a normalized copy (e.g., lower-casing categories, stripping whitespace)."""
        if self.categories:
            object.__setattr__(self, "categories", [c.strip().lower() for c in self.categories])
        return self


ResponseGuardrail = str | ResponseGuardrailSpec


@runtime_checkable
class Agents(Protocol):
    """Agents

    APIs for creating and interacting with agentic systems."""

    # We situate the OpenAI Responses API in the Agents API just like we did things
    # for Inference. The Responses API, in its intent, serves the same purpose as
    # the Agents API above -- it is essentially a lightweight "agentic loop" with
    # integrated tool calling.
    #
    # Both of these APIs are inherently stateful.

    @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_openai_response(
        self,
        response_id: str,
    ) -> OpenAIResponseObject:
        """Get a model response.

        :param response_id: The ID of the OpenAI response to retrieve.
        :returns: An OpenAIResponseObject.
        """
        ...

    @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
    async def create_openai_response(
        self,
        input: str | list[OpenAIResponseInput],
        model: str,
        prompt: OpenAIResponsePrompt | None = None,
        instructions: str | None = None,
        parallel_tool_calls: bool | None = True,
        previous_response_id: str | None = None,
        conversation: str | None = None,
        store: bool | None = True,
        stream: bool | None = False,
        temperature: float | None = None,
        text: OpenAIResponseText | None = None,
        tools: list[OpenAIResponseInputTool] | None = None,
        include: list[str] | None = None,
        max_infer_iters: int | None = 10,  # this is an extension to the OpenAI API
        guardrails: Annotated[
            list[ResponseGuardrail] | None,
            ExtraBodyField(
                "List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
            ),
        ] = None,
        max_tool_calls: int | None = None,
    ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
        """Create a model response.

        :param input: Input message(s) to create the response.
        :param model: The underlying LLM used for completions.
        :param prompt: (Optional) Prompt object with ID, version, and variables.
        :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
        :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
        :param include: (Optional) Additional fields to include in the response.
        :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
        :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
        :returns: An OpenAIResponseObject.
        """
        ...

    @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_responses(
        self,
        after: str | None = None,
        limit: int | None = 50,
        model: str | None = None,
        order: Order | None = Order.desc,
    ) -> ListOpenAIResponseObject:
        """List all responses.

        :param after: The ID of the last response to return.
        :param limit: The number of responses to return.
        :param model: The model to filter responses by.
        :param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
        :returns: A ListOpenAIResponseObject.
        """
        ...

    @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_response_input_items(
        self,
        response_id: str,
        after: str | None = None,
        before: str | None = None,
        include: list[str] | None = None,
        limit: int | None = 20,
        order: Order | None = Order.desc,
    ) -> ListOpenAIResponseInputItem:
        """List input items.

        :param response_id: The ID of the response to retrieve input items for.
        :param after: An item ID to list items after, used for pagination.
        :param before: An item ID to list items before, used for pagination.
        :param include: Additional fields to include in the response.
        :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
        :param order: The order to return the input items in. Default is desc.
        :returns: An ListOpenAIResponseInputItem.
        """
        ...

    @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
        """Delete a response.

        :param response_id: The ID of the OpenAI response to delete.
        :returns: An OpenAIDeleteResponseObject
        """
        ...