chore(package): migrate to src/ layout

Moved package code from llama_stack/ to src/llama_stack/ following Python packaging best practices. Updated pyproject.toml, MANIFEST.in, and tool configurations accordingly. Public API and import paths remain unchanged. Developers will need to reinstall in editable mode after pulling this change. Also updated paths in pre-commit config, scripts, and GitHub workflows.
2025-12-12 20:12:33 +00:00 · 2025-10-27 11:27:58 -07:00 · 2025-10-27 11:27:58 -07:00 · 8e5ed739ec
commit 8e5ed739ec
parent 98a5047f9d
790 changed files with 2947 additions and 447 deletions
--- a/src/llama_stack/init.py
+++ b/src/llama_stack/init.py
@ -0,0 +1,10 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.core.library_client import (  # noqa: F401
+    AsyncLlamaStackAsLibraryClient,
+    LlamaStackAsLibraryClient,
+)
--- a/src/llama_stack/apis/init.py
+++ b/src/llama_stack/apis/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/src/llama_stack/apis/agents/init.py
+++ b/src/llama_stack/apis/agents/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .agents import *
--- a/src/llama_stack/apis/agents/agents.py
+++ b/src/llama_stack/apis/agents/agents.py
@ -0,0 +1,894 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from collections.abc import AsyncIterator
+from datetime import datetime
+from enum import StrEnum
+from typing import Annotated, Any, Literal, Protocol, runtime_checkable
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from llama_stack.apis.common.content_types import URL, ContentDelta, InterleavedContent
+from llama_stack.apis.common.responses import Order, PaginatedResponse
+from llama_stack.apis.inference import (
+    CompletionMessage,
+    ResponseFormat,
+    SamplingParams,
+    ToolCall,
+    ToolChoice,
+    ToolConfig,
+    ToolPromptFormat,
+    ToolResponse,
+    ToolResponseMessage,
+    UserMessage,
+)
+from llama_stack.apis.safety import SafetyViolation
+from llama_stack.apis.tools import ToolDef
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
+from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod
+
+from .openai_responses import (
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    OpenAIDeleteResponseObject,
+    OpenAIResponseInput,
+    OpenAIResponseInputTool,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+    OpenAIResponseText,
+)
+
+
+@json_schema_type
+class ResponseGuardrailSpec(BaseModel):
+    """Specification for a guardrail to apply during response generation.
+
+    :param type: The type/identifier of the guardrail.
+    """
+
+    type: str
+    # TODO: more fields to be added for guardrail configuration
+
+
+ResponseGuardrail = str | ResponseGuardrailSpec
+
+
+class Attachment(BaseModel):
+    """An attachment to an agent turn.
+
+    :param content: The content of the attachment.
+    :param mime_type: The MIME type of the attachment.
+    """
+
+    content: InterleavedContent | URL
+    mime_type: str
+
+
+class Document(BaseModel):
+    """A document to be used by an agent.
+
+    :param content: The content of the document.
+    :param mime_type: The MIME type of the document.
+    """
+
+    content: InterleavedContent | URL
+    mime_type: str
+
+
+class StepCommon(BaseModel):
+    """A common step in an agent turn.
+
+    :param turn_id: The ID of the turn.
+    :param step_id: The ID of the step.
+    :param started_at: The time the step started.
+    :param completed_at: The time the step completed.
+    """
+
+    turn_id: str
+    step_id: str
+    started_at: datetime | None = None
+    completed_at: datetime | None = None
+
+
+class StepType(StrEnum):
+    """Type of the step in an agent turn.
+
+    :cvar inference: The step is an inference step that calls an LLM.
+    :cvar tool_execution: The step is a tool execution step that executes a tool call.
+    :cvar shield_call: The step is a shield call step that checks for safety violations.
+    :cvar memory_retrieval: The step is a memory retrieval step that retrieves context for vector dbs.
+    """
+
+    inference = "inference"
+    tool_execution = "tool_execution"
+    shield_call = "shield_call"
+    memory_retrieval = "memory_retrieval"
+
+
+@json_schema_type
+class InferenceStep(StepCommon):
+    """An inference step in an agent turn.
+
+    :param model_response: The response from the LLM.
+    """
+
+    model_config = ConfigDict(protected_namespaces=())
+
+    step_type: Literal[StepType.inference] = StepType.inference
+    model_response: CompletionMessage
+
+
+@json_schema_type
+class ToolExecutionStep(StepCommon):
+    """A tool execution step in an agent turn.
+
+    :param tool_calls: The tool calls to execute.
+    :param tool_responses: The tool responses from the tool calls.
+    """
+
+    step_type: Literal[StepType.tool_execution] = StepType.tool_execution
+    tool_calls: list[ToolCall]
+    tool_responses: list[ToolResponse]
+
+
+@json_schema_type
+class ShieldCallStep(StepCommon):
+    """A shield call step in an agent turn.
+
+    :param violation: The violation from the shield call.
+    """
+
+    step_type: Literal[StepType.shield_call] = StepType.shield_call
+    violation: SafetyViolation | None
+
+
+@json_schema_type
+class MemoryRetrievalStep(StepCommon):
+    """A memory retrieval step in an agent turn.
+
+    :param vector_db_ids: The IDs of the vector databases to retrieve context from.
+    :param inserted_context: The context retrieved from the vector databases.
+    """
+
+    step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
+    # TODO: should this be List[str]?
+    vector_db_ids: str
+    inserted_context: InterleavedContent
+
+
+Step = Annotated[
+    InferenceStep | ToolExecutionStep | ShieldCallStep | MemoryRetrievalStep,
+    Field(discriminator="step_type"),
+]
+
+
+@json_schema_type
+class Turn(BaseModel):
+    """A single turn in an interaction with an Agentic System.
+
+    :param turn_id: Unique identifier for the turn within a session
+    :param session_id: Unique identifier for the conversation session
+    :param input_messages: List of messages that initiated this turn
+    :param steps: Ordered list of processing steps executed during this turn
+    :param output_message: The model's generated response containing content and metadata
+    :param output_attachments: (Optional) Files or media attached to the agent's response
+    :param started_at: Timestamp when the turn began
+    :param completed_at: (Optional) Timestamp when the turn finished, if completed
+    """
+
+    turn_id: str
+    session_id: str
+    input_messages: list[UserMessage | ToolResponseMessage]
+    steps: list[Step]
+    output_message: CompletionMessage
+    output_attachments: list[Attachment] | None = Field(default_factory=lambda: [])
+
+    started_at: datetime
+    completed_at: datetime | None = None
+
+
+@json_schema_type
+class Session(BaseModel):
+    """A single session of an interaction with an Agentic System.
+
+    :param session_id: Unique identifier for the conversation session
+    :param session_name: Human-readable name for the session
+    :param turns: List of all turns that have occurred in this session
+    :param started_at: Timestamp when the session was created
+    """
+
+    session_id: str
+    session_name: str
+    turns: list[Turn]
+    started_at: datetime
+
+
+class AgentToolGroupWithArgs(BaseModel):
+    name: str
+    args: dict[str, Any]
+
+
+AgentToolGroup = str | AgentToolGroupWithArgs
+register_schema(AgentToolGroup, name="AgentTool")
+
+
+class AgentConfigCommon(BaseModel):
+    sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
+
+    input_shields: list[str] | None = Field(default_factory=lambda: [])
+    output_shields: list[str] | None = Field(default_factory=lambda: [])
+    toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
+    client_tools: list[ToolDef] | None = Field(default_factory=lambda: [])
+    tool_choice: ToolChoice | None = Field(default=None, deprecated="use tool_config instead")
+    tool_prompt_format: ToolPromptFormat | None = Field(default=None, deprecated="use tool_config instead")
+    tool_config: ToolConfig | None = Field(default=None)
+
+    max_infer_iters: int | None = 10
+
+    def model_post_init(self, __context):
+        if self.tool_config:
+            if self.tool_choice and self.tool_config.tool_choice != self.tool_choice:
+                raise ValueError("tool_choice is deprecated. Use tool_choice in tool_config instead.")
+            if self.tool_prompt_format and self.tool_config.tool_prompt_format != self.tool_prompt_format:
+                raise ValueError("tool_prompt_format is deprecated. Use tool_prompt_format in tool_config instead.")
+        else:
+            params = {}
+            if self.tool_choice:
+                params["tool_choice"] = self.tool_choice
+            if self.tool_prompt_format:
+                params["tool_prompt_format"] = self.tool_prompt_format
+            self.tool_config = ToolConfig(**params)
+
+
+@json_schema_type
+class AgentConfig(AgentConfigCommon):
+    """Configuration for an agent.
+
+    :param model: The model identifier to use for the agent
+    :param instructions: The system instructions for the agent
+    :param name: Optional name for the agent, used in telemetry and identification
+    :param enable_session_persistence: Optional flag indicating whether session data has to be persisted
+    :param response_format: Optional response format configuration
+    """
+
+    model: str
+    instructions: str
+    name: str | None = None
+    enable_session_persistence: bool | None = False
+    response_format: ResponseFormat | None = None
+
+
+@json_schema_type
+class Agent(BaseModel):
+    """An agent instance with configuration and metadata.
+
+    :param agent_id: Unique identifier for the agent
+    :param agent_config: Configuration settings for the agent
+    :param created_at: Timestamp when the agent was created
+    """
+
+    agent_id: str
+    agent_config: AgentConfig
+    created_at: datetime
+
+
+class AgentConfigOverridablePerTurn(AgentConfigCommon):
+    instructions: str | None = None
+
+
+class AgentTurnResponseEventType(StrEnum):
+    step_start = "step_start"
+    step_complete = "step_complete"
+    step_progress = "step_progress"
+
+    turn_start = "turn_start"
+    turn_complete = "turn_complete"
+    turn_awaiting_input = "turn_awaiting_input"
+
+
+@json_schema_type
+class AgentTurnResponseStepStartPayload(BaseModel):
+    """Payload for step start events in agent turn responses.
+
+    :param event_type: Type of event being reported
+    :param step_type: Type of step being executed
+    :param step_id: Unique identifier for the step within a turn
+    :param metadata: (Optional) Additional metadata for the step
+    """
+
+    event_type: Literal[AgentTurnResponseEventType.step_start] = AgentTurnResponseEventType.step_start
+    step_type: StepType
+    step_id: str
+    metadata: dict[str, Any] | None = Field(default_factory=lambda: {})
+
+
+@json_schema_type
+class AgentTurnResponseStepCompletePayload(BaseModel):
+    """Payload for step completion events in agent turn responses.
+
+    :param event_type: Type of event being reported
+    :param step_type: Type of step being executed
+    :param step_id: Unique identifier for the step within a turn
+    :param step_details: Complete details of the executed step
+    """
+
+    event_type: Literal[AgentTurnResponseEventType.step_complete] = AgentTurnResponseEventType.step_complete
+    step_type: StepType
+    step_id: str
+    step_details: Step
+
+
+@json_schema_type
+class AgentTurnResponseStepProgressPayload(BaseModel):
+    """Payload for step progress events in agent turn responses.
+
+    :param event_type: Type of event being reported
+    :param step_type: Type of step being executed
+    :param step_id: Unique identifier for the step within a turn
+    :param delta: Incremental content changes during step execution
+    """
+
+    model_config = ConfigDict(protected_namespaces=())
+
+    event_type: Literal[AgentTurnResponseEventType.step_progress] = AgentTurnResponseEventType.step_progress
+    step_type: StepType
+    step_id: str
+
+    delta: ContentDelta
+
+
+@json_schema_type
+class AgentTurnResponseTurnStartPayload(BaseModel):
+    """Payload for turn start events in agent turn responses.
+
+    :param event_type: Type of event being reported
+    :param turn_id: Unique identifier for the turn within a session
+    """
+
+    event_type: Literal[AgentTurnResponseEventType.turn_start] = AgentTurnResponseEventType.turn_start
+    turn_id: str
+
+
+@json_schema_type
+class AgentTurnResponseTurnCompletePayload(BaseModel):
+    """Payload for turn completion events in agent turn responses.
+
+    :param event_type: Type of event being reported
+    :param turn: Complete turn data including all steps and results
+    """
+
+    event_type: Literal[AgentTurnResponseEventType.turn_complete] = AgentTurnResponseEventType.turn_complete
+    turn: Turn
+
+
+@json_schema_type
+class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
+    """Payload for turn awaiting input events in agent turn responses.
+
+    :param event_type: Type of event being reported
+    :param turn: Turn data when waiting for external tool responses
+    """
+
+    event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input] = AgentTurnResponseEventType.turn_awaiting_input
+    turn: Turn
+
+
+AgentTurnResponseEventPayload = Annotated[
+    AgentTurnResponseStepStartPayload
+    | AgentTurnResponseStepProgressPayload
+    | AgentTurnResponseStepCompletePayload
+    | AgentTurnResponseTurnStartPayload
+    | AgentTurnResponseTurnCompletePayload
+    | AgentTurnResponseTurnAwaitingInputPayload,
+    Field(discriminator="event_type"),
+]
+register_schema(AgentTurnResponseEventPayload, name="AgentTurnResponseEventPayload")
+
+
+@json_schema_type
+class AgentTurnResponseEvent(BaseModel):
+    """An event in an agent turn response stream.
+
+    :param payload: Event-specific payload containing event data
+    """
+
+    payload: AgentTurnResponseEventPayload
+
+
+@json_schema_type
+class AgentCreateResponse(BaseModel):
+    """Response returned when creating a new agent.
+
+    :param agent_id: Unique identifier for the created agent
+    """
+
+    agent_id: str
+
+
+@json_schema_type
+class AgentSessionCreateResponse(BaseModel):
+    """Response returned when creating a new agent session.
+
+    :param session_id: Unique identifier for the created session
+    """
+
+    session_id: str
+
+
+@json_schema_type
+class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
+    """Request to create a new turn for an agent.
+
+    :param agent_id: Unique identifier for the agent
+    :param session_id: Unique identifier for the conversation session
+    :param messages: List of messages to start the turn with
+    :param documents: (Optional) List of documents to provide to the agent
+    :param toolgroups: (Optional) List of tool groups to make available for this turn
+    :param stream: (Optional) Whether to stream the response
+    :param tool_config: (Optional) Tool configuration to override agent defaults
+    """
+
+    agent_id: str
+    session_id: str
+
+    # TODO: figure out how we can simplify this and make why
+    # ToolResponseMessage needs to be here (it is function call
+    # execution from outside the system)
+    messages: list[UserMessage | ToolResponseMessage]
+
+    documents: list[Document] | None = None
+    toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
+
+    stream: bool | None = False
+    tool_config: ToolConfig | None = None
+
+
+@json_schema_type
+class AgentTurnResumeRequest(BaseModel):
+    """Request to resume an agent turn with tool responses.
+
+    :param agent_id: Unique identifier for the agent
+    :param session_id: Unique identifier for the conversation session
+    :param turn_id: Unique identifier for the turn within a session
+    :param tool_responses: List of tool responses to submit to continue the turn
+    :param stream: (Optional) Whether to stream the response
+    """
+
+    agent_id: str
+    session_id: str
+    turn_id: str
+    tool_responses: list[ToolResponse]
+    stream: bool | None = False
+
+
+@json_schema_type
+class AgentTurnResponseStreamChunk(BaseModel):
+    """Streamed agent turn completion response.
+
+    :param event: Individual event in the agent turn response stream
+    """
+
+    event: AgentTurnResponseEvent
+
+
+@json_schema_type
+class AgentStepResponse(BaseModel):
+    """Response containing details of a specific agent step.
+
+    :param step: The complete step data and execution details
+    """
+
+    step: Step
+
+
+@runtime_checkable
+class Agents(Protocol):
+    """Agents
+
+    APIs for creating and interacting with agentic systems."""
+
+    @webmethod(
+        route="/agents",
+        method="POST",
+        descriptive_name="create_agent",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/agents",
+        method="POST",
+        descriptive_name="create_agent",
+        level=LLAMA_STACK_API_V1ALPHA,
+    )
+    async def create_agent(
+        self,
+        agent_config: AgentConfig,
+    ) -> AgentCreateResponse:
+        """Create an agent with the given configuration.
+
+        :param agent_config: The configuration for the agent.
+        :returns: An AgentCreateResponse with the agent ID.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}/turn",
+        method="POST",
+        descriptive_name="create_agent_turn",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}/turn",
+        method="POST",
+        descriptive_name="create_agent_turn",
+        level=LLAMA_STACK_API_V1ALPHA,
+    )
+    async def create_agent_turn(
+        self,
+        agent_id: str,
+        session_id: str,
+        messages: list[UserMessage | ToolResponseMessage],
+        stream: bool | None = False,
+        documents: list[Document] | None = None,
+        toolgroups: list[AgentToolGroup] | None = None,
+        tool_config: ToolConfig | None = None,
+    ) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
+        """Create a new turn for an agent.
+
+        :param agent_id: The ID of the agent to create the turn for.
+        :param session_id: The ID of the session to create the turn for.
+        :param messages: List of messages to start the turn with.
+        :param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
+        :param documents: (Optional) List of documents to create the turn with.
+        :param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
+        :param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
+        :returns: If stream=False, returns a Turn object.
+                  If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
+        method="POST",
+        descriptive_name="resume_agent_turn",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
+        method="POST",
+        descriptive_name="resume_agent_turn",
+        level=LLAMA_STACK_API_V1ALPHA,
+    )
+    async def resume_agent_turn(
+        self,
+        agent_id: str,
+        session_id: str,
+        turn_id: str,
+        tool_responses: list[ToolResponse],
+        stream: bool | None = False,
+    ) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
+        """Resume an agent turn with executed tool call responses.
+
+        When a Turn has the status `awaiting_input` due to pending input from client side tool calls, this endpoint can be used to submit the outputs from the tool calls once they are ready.
+
+        :param agent_id: The ID of the agent to resume.
+        :param session_id: The ID of the session to resume.
+        :param turn_id: The ID of the turn to resume.
+        :param tool_responses: The tool call responses to resume the turn with.
+        :param stream: Whether to stream the response.
+        :returns: A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk objects.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
+        method="GET",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1ALPHA,
+    )
+    async def get_agents_turn(
+        self,
+        agent_id: str,
+        session_id: str,
+        turn_id: str,
+    ) -> Turn:
+        """Retrieve an agent turn by its ID.
+
+        :param agent_id: The ID of the agent to get the turn for.
+        :param session_id: The ID of the session to get the turn for.
+        :param turn_id: The ID of the turn to get.
+        :returns: A Turn.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
+        method="GET",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1ALPHA,
+    )
+    async def get_agents_step(
+        self,
+        agent_id: str,
+        session_id: str,
+        turn_id: str,
+        step_id: str,
+    ) -> AgentStepResponse:
+        """Retrieve an agent step by its ID.
+
+        :param agent_id: The ID of the agent to get the step for.
+        :param session_id: The ID of the session to get the step for.
+        :param turn_id: The ID of the turn to get the step for.
+        :param step_id: The ID of the step to get.
+        :returns: An AgentStepResponse.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}/session",
+        method="POST",
+        descriptive_name="create_agent_session",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/agents/{agent_id}/session",
+        method="POST",
+        descriptive_name="create_agent_session",
+        level=LLAMA_STACK_API_V1ALPHA,
+    )
+    async def create_agent_session(
+        self,
+        agent_id: str,
+        session_name: str,
+    ) -> AgentSessionCreateResponse:
+        """Create a new session for an agent.
+
+        :param agent_id: The ID of the agent to create the session for.
+        :param session_name: The name of the session to create.
+        :returns: An AgentSessionCreateResponse.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}",
+        method="GET",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1ALPHA,
+    )
+    async def get_agents_session(
+        self,
+        session_id: str,
+        agent_id: str,
+        turn_ids: list[str] | None = None,
+    ) -> Session:
+        """Retrieve an agent session by its ID.
+
+        :param session_id: The ID of the session to get.
+        :param agent_id: The ID of the agent to get the session for.
+        :param turn_ids: (Optional) List of turn IDs to filter the session by.
+        :returns: A Session.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}",
+        method="DELETE",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1ALPHA,
+    )
+    async def delete_agents_session(
+        self,
+        session_id: str,
+        agent_id: str,
+    ) -> None:
+        """Delete an agent session by its ID and its associated turns.
+
+        :param session_id: The ID of the session to delete.
+        :param agent_id: The ID of the agent to delete the session for.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}",
+        method="DELETE",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
+    async def delete_agent(
+        self,
+        agent_id: str,
+    ) -> None:
+        """Delete an agent by its ID and its associated sessions and turns.
+
+        :param agent_id: The ID of the agent to delete.
+        """
+        ...
+
+    @webmethod(route="/agents", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
+    @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA)
+    async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
+        """List all agents.
+
+        :param start_index: The index to start the pagination from.
+        :param limit: The number of agents to return.
+        :returns: A PaginatedResponse.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}",
+        method="GET",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
+    async def get_agent(self, agent_id: str) -> Agent:
+        """Describe an agent by its ID.
+
+        :param agent_id: ID of the agent.
+        :returns: An Agent of the agent.
+        """
+        ...
+
+    @webmethod(
+        route="/agents/{agent_id}/sessions",
+        method="GET",
+        deprecated=True,
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
+    async def list_agent_sessions(
+        self,
+        agent_id: str,
+        start_index: int | None = None,
+        limit: int | None = None,
+    ) -> PaginatedResponse:
+        """List all session(s) of a given agent.
+
+        :param agent_id: The ID of the agent to list sessions for.
+        :param start_index: The index to start the pagination from.
+        :param limit: The number of sessions to return.
+        :returns: A PaginatedResponse.
+        """
+        ...
+
+    # We situate the OpenAI Responses API in the Agents API just like we did things
+    # for Inference. The Responses API, in its intent, serves the same purpose as
+    # the Agents API above -- it is essentially a lightweight "agentic loop" with
+    # integrated tool calling.
+    #
+    # Both of these APIs are inherently stateful.
+
+    @webmethod(
+        route="/openai/v1/responses/{response_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
+    async def get_openai_response(
+        self,
+        response_id: str,
+    ) -> OpenAIResponseObject:
+        """Get a model response.
+
+        :param response_id: The ID of the OpenAI response to retrieve.
+        :returns: An OpenAIResponseObject.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
+    async def create_openai_response(
+        self,
+        input: str | list[OpenAIResponseInput],
+        model: str,
+        instructions: str | None = None,
+        previous_response_id: str | None = None,
+        conversation: str | None = None,
+        store: bool | None = True,
+        stream: bool | None = False,
+        temperature: float | None = None,
+        text: OpenAIResponseText | None = None,
+        tools: list[OpenAIResponseInputTool] | None = None,
+        include: list[str] | None = None,
+        max_infer_iters: int | None = 10,  # this is an extension to the OpenAI API
+        guardrails: Annotated[
+            list[ResponseGuardrail] | None,
+            ExtraBodyField(
+                "List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
+            ),
+        ] = None,
+    ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
+        """Create a model response.
+
+        :param input: Input message(s) to create the response.
+        :param model: The underlying LLM used for completions.
+        :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
+        :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
+        :param include: (Optional) Additional fields to include in the response.
+        :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
+        :returns: An OpenAIResponseObject.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_openai_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        """List all responses.
+
+        :param after: The ID of the last response to return.
+        :param limit: The number of responses to return.
+        :param model: The model to filter responses by.
+        :param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
+        :returns: A ListOpenAIResponseObject.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_openai_response_input_items(
+        self,
+        response_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        include: list[str] | None = None,
+        limit: int | None = 20,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseInputItem:
+        """List input items.
+
+        :param response_id: The ID of the response to retrieve input items for.
+        :param after: An item ID to list items after, used for pagination.
+        :param before: An item ID to list items before, used for pagination.
+        :param include: Additional fields to include in the response.
+        :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
+        :param order: The order to return the input items in. Default is desc.
+        :returns: An ListOpenAIResponseInputItem.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
+        """Delete a response.
+
+        :param response_id: The ID of the OpenAI response to delete.
+        :returns: An OpenAIDeleteResponseObject
+        """
+        ...
--- a/src/llama_stack/apis/agents/openai_responses.py
+++ b/src/llama_stack/apis/agents/openai_responses.py
--- a/src/llama_stack/apis/batches/init.py
+++ b/src/llama_stack/apis/batches/init.py
@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .batches import Batches, BatchObject, ListBatchesResponse
+
+__all__ = ["Batches", "BatchObject", "ListBatchesResponse"]
--- a/src/llama_stack/apis/batches/batches.py
+++ b/src/llama_stack/apis/batches/batches.py
@ -0,0 +1,100 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Literal, Protocol, runtime_checkable
+
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+try:
+    from openai.types import Batch as BatchObject
+except ImportError as e:
+    raise ImportError("OpenAI package is required for batches API. Please install it with: pip install openai") from e
+
+
+@json_schema_type
+class ListBatchesResponse(BaseModel):
+    """Response containing a list of batch objects."""
+
+    object: Literal["list"] = "list"
+    data: list[BatchObject] = Field(..., description="List of batch objects")
+    first_id: str | None = Field(default=None, description="ID of the first batch in the list")
+    last_id: str | None = Field(default=None, description="ID of the last batch in the list")
+    has_more: bool = Field(default=False, description="Whether there are more batches available")
+
+
+@runtime_checkable
+class Batches(Protocol):
+    """
+    The Batches API enables efficient processing of multiple requests in a single operation,
+    particularly useful for processing large datasets, batch evaluation workflows, and
+    cost-effective inference at scale.
+
+    The API is designed to allow use of openai client libraries for seamless integration.
+
+    This API provides the following extensions:
+     - idempotent batch creation
+
+    Note: This API is currently under active development and may undergo changes.
+    """
+
+    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
+    async def create_batch(
+        self,
+        input_file_id: str,
+        endpoint: str,
+        completion_window: Literal["24h"],
+        metadata: dict[str, str] | None = None,
+        idempotency_key: str | None = None,
+    ) -> BatchObject:
+        """Create a new batch for processing multiple API requests.
+
+        :param input_file_id: The ID of an uploaded file containing requests for the batch.
+        :param endpoint: The endpoint to be used for all requests in the batch.
+        :param completion_window: The time window within which the batch should be processed.
+        :param metadata: Optional metadata for the batch.
+        :param idempotency_key: Optional idempotency key. When provided, enables idempotent behavior.
+        :returns: The created batch object.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
+    async def retrieve_batch(self, batch_id: str) -> BatchObject:
+        """Retrieve information about a specific batch.
+
+        :param batch_id: The ID of the batch to retrieve.
+        :returns: The batch object.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
+    async def cancel_batch(self, batch_id: str) -> BatchObject:
+        """Cancel a batch that is in progress.
+
+        :param batch_id: The ID of the batch to cancel.
+        :returns: The updated batch object.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_batches(
+        self,
+        after: str | None = None,
+        limit: int = 20,
+    ) -> ListBatchesResponse:
+        """List all batches for the current user.
+
+        :param after: A cursor for pagination; returns batches after this batch ID.
+        :param limit: Number of batches to return (default 20, max 100).
+        :returns: A list of batch objects.
+        """
+        ...
--- a/src/llama_stack/apis/benchmarks/init.py
+++ b/src/llama_stack/apis/benchmarks/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .benchmarks import *
--- a/src/llama_stack/apis/benchmarks/benchmarks.py
+++ b/src/llama_stack/apis/benchmarks/benchmarks.py
@ -0,0 +1,108 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any, Literal, Protocol, runtime_checkable
+
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+class CommonBenchmarkFields(BaseModel):
+    dataset_id: str
+    scoring_functions: list[str]
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Metadata for this evaluation task",
+    )
+
+
+@json_schema_type
+class Benchmark(CommonBenchmarkFields, Resource):
+    """A benchmark resource for evaluating model performance.
+
+    :param dataset_id: Identifier of the dataset to use for the benchmark evaluation
+    :param scoring_functions: List of scoring function identifiers to apply during evaluation
+    :param metadata: Metadata for this evaluation task
+    :param type: The resource type, always benchmark
+    """
+
+    type: Literal[ResourceType.benchmark] = ResourceType.benchmark
+
+    @property
+    def benchmark_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_benchmark_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class BenchmarkInput(CommonBenchmarkFields, BaseModel):
+    benchmark_id: str
+    provider_id: str | None = None
+    provider_benchmark_id: str | None = None
+
+
+class ListBenchmarksResponse(BaseModel):
+    data: list[Benchmark]
+
+
+@runtime_checkable
+class Benchmarks(Protocol):
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
+    async def list_benchmarks(self) -> ListBenchmarksResponse:
+        """List all benchmarks.
+
+        :returns: A ListBenchmarksResponse.
+        """
+        ...
+
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
+    async def get_benchmark(
+        self,
+        benchmark_id: str,
+    ) -> Benchmark:
+        """Get a benchmark by its ID.
+
+        :param benchmark_id: The ID of the benchmark to get.
+        :returns: A Benchmark.
+        """
+        ...
+
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
+    async def register_benchmark(
+        self,
+        benchmark_id: str,
+        dataset_id: str,
+        scoring_functions: list[str],
+        provider_benchmark_id: str | None = None,
+        provider_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Register a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to register.
+        :param dataset_id: The ID of the dataset to use for the benchmark.
+        :param scoring_functions: The scoring functions to use for the benchmark.
+        :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
+        :param provider_id: The ID of the provider to use for the benchmark.
+        :param metadata: The metadata to use for the benchmark.
+        """
+        ...
+
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        """Unregister a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to unregister.
+        """
+        ...
--- a/src/llama_stack/apis/common/init.py
+++ b/src/llama_stack/apis/common/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/src/llama_stack/apis/common/content_types.py
+++ b/src/llama_stack/apis/common/content_types.py
@ -0,0 +1,143 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import Enum
+from typing import Annotated, Literal
+
+from pydantic import BaseModel, Field, model_validator
+
+from llama_stack.models.llama.datatypes import ToolCall
+from llama_stack.schema_utils import json_schema_type, register_schema
+
+
+@json_schema_type
+class URL(BaseModel):
+    """A URL reference to external content.
+
+    :param uri: The URL string pointing to the resource
+    """
+
+    uri: str
+
+
+class _URLOrData(BaseModel):
+    """
+    A URL or a base64 encoded string
+
+    :param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits.
+    :param data: base64 encoded image data as string
+    """
+
+    url: URL | None = None
+    # data is a base64 encoded string, hint with contentEncoding=base64
+    data: str | None = Field(default=None, json_schema_extra={"contentEncoding": "base64"})
+
+    @model_validator(mode="before")
+    @classmethod
+    def validator(cls, values):
+        if isinstance(values, dict):
+            return values
+        return {"url": values}
+
+
+@json_schema_type
+class ImageContentItem(BaseModel):
+    """A image content item
+
+    :param type: Discriminator type of the content item. Always "image"
+    :param image: Image as a base64 encoded string or an URL
+    """
+
+    type: Literal["image"] = "image"
+    image: _URLOrData
+
+
+@json_schema_type
+class TextContentItem(BaseModel):
+    """A text content item
+
+    :param type: Discriminator type of the content item. Always "text"
+    :param text: Text content
+    """
+
+    type: Literal["text"] = "text"
+    text: str
+
+
+# other modalities can be added here
+InterleavedContentItem = Annotated[
+    ImageContentItem | TextContentItem,
+    Field(discriminator="type"),
+]
+register_schema(InterleavedContentItem, name="InterleavedContentItem")
+
+# accept a single "str" as a special case since it is common
+InterleavedContent = str | InterleavedContentItem | list[InterleavedContentItem]
+register_schema(InterleavedContent, name="InterleavedContent")
+
+
+@json_schema_type
+class TextDelta(BaseModel):
+    """A text content delta for streaming responses.
+
+    :param type: Discriminator type of the delta. Always "text"
+    :param text: The incremental text content
+    """
+
+    type: Literal["text"] = "text"
+    text: str
+
+
+@json_schema_type
+class ImageDelta(BaseModel):
+    """An image content delta for streaming responses.
+
+    :param type: Discriminator type of the delta. Always "image"
+    :param image: The incremental image data as bytes
+    """
+
+    type: Literal["image"] = "image"
+    image: bytes
+
+
+class ToolCallParseStatus(Enum):
+    """Status of tool call parsing during streaming.
+    :cvar started: Tool call parsing has begun
+    :cvar in_progress: Tool call parsing is ongoing
+    :cvar failed: Tool call parsing failed
+    :cvar succeeded: Tool call parsing completed successfully
+    """
+
+    started = "started"
+    in_progress = "in_progress"
+    failed = "failed"
+    succeeded = "succeeded"
+
+
+@json_schema_type
+class ToolCallDelta(BaseModel):
+    """A tool call content delta for streaming responses.
+
+    :param type: Discriminator type of the delta. Always "tool_call"
+    :param tool_call: Either an in-progress tool call string or the final parsed tool call
+    :param parse_status: Current parsing status of the tool call
+    """
+
+    type: Literal["tool_call"] = "tool_call"
+
+    # you either send an in-progress tool call so the client can stream a long
+    # code generation or you send the final parsed tool call at the end of the
+    # stream
+    tool_call: str | ToolCall
+    parse_status: ToolCallParseStatus
+
+
+# streaming completions send a stream of ContentDeltas
+ContentDelta = Annotated[
+    TextDelta | ImageDelta | ToolCallDelta,
+    Field(discriminator="type"),
+]
+register_schema(ContentDelta, name="ContentDelta")
--- a/src/llama_stack/apis/common/errors.py
+++ b/src/llama_stack/apis/common/errors.py
@ -0,0 +1,103 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Custom Llama Stack Exception classes should follow the following schema
+#   1. All classes should inherit from an existing Built-In Exception class: https://docs.python.org/3/library/exceptions.html
+#   2. All classes should have a custom error message with the goal of informing the Llama Stack user specifically
+#   3. All classes should propogate the inherited __init__ function otherwise via 'super().__init__(message)'
+
+
+class ResourceNotFoundError(ValueError):
+    """generic exception for a missing Llama Stack resource"""
+
+    def __init__(self, resource_name: str, resource_type: str, client_list: str) -> None:
+        message = (
+            f"{resource_type} '{resource_name}' not found. Use '{client_list}' to list available {resource_type}s."
+        )
+        super().__init__(message)
+
+
+class UnsupportedModelError(ValueError):
+    """raised when model is not present in the list of supported models"""
+
+    def __init__(self, model_name: str, supported_models_list: list[str]):
+        message = f"'{model_name}' model is not supported. Supported models are: {', '.join(supported_models_list)}"
+        super().__init__(message)
+
+
+class ModelNotFoundError(ResourceNotFoundError):
+    """raised when Llama Stack cannot find a referenced model"""
+
+    def __init__(self, model_name: str) -> None:
+        super().__init__(model_name, "Model", "client.models.list()")
+
+
+class VectorStoreNotFoundError(ResourceNotFoundError):
+    """raised when Llama Stack cannot find a referenced vector store"""
+
+    def __init__(self, vector_store_name: str) -> None:
+        super().__init__(vector_store_name, "Vector Store", "client.vector_dbs.list()")
+
+
+class DatasetNotFoundError(ResourceNotFoundError):
+    """raised when Llama Stack cannot find a referenced dataset"""
+
+    def __init__(self, dataset_name: str) -> None:
+        super().__init__(dataset_name, "Dataset", "client.datasets.list()")
+
+
+class ToolGroupNotFoundError(ResourceNotFoundError):
+    """raised when Llama Stack cannot find a referenced tool group"""
+
+    def __init__(self, toolgroup_name: str) -> None:
+        super().__init__(toolgroup_name, "Tool Group", "client.toolgroups.list()")
+
+
+class SessionNotFoundError(ValueError):
+    """raised when Llama Stack cannot find a referenced session or access is denied"""
+
+    def __init__(self, session_name: str) -> None:
+        message = f"Session '{session_name}' not found or access denied."
+        super().__init__(message)
+
+
+class ModelTypeError(TypeError):
+    """raised when a model is present but not the correct type"""
+
+    def __init__(self, model_name: str, model_type: str, expected_model_type: str) -> None:
+        message = (
+            f"Model '{model_name}' is of type '{model_type}' rather than the expected type '{expected_model_type}'"
+        )
+        super().__init__(message)
+
+
+class ConflictError(ValueError):
+    """raised when an operation cannot be performed due to a conflict with the current state"""
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class TokenValidationError(ValueError):
+    """raised when token validation fails during authentication"""
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class ConversationNotFoundError(ResourceNotFoundError):
+    """raised when Llama Stack cannot find a referenced conversation"""
+
+    def __init__(self, conversation_id: str) -> None:
+        super().__init__(conversation_id, "Conversation", "client.conversations.list()")
+
+
+class InvalidConversationIdError(ValueError):
+    """raised when a conversation ID has an invalid format"""
+
+    def __init__(self, conversation_id: str) -> None:
+        message = f"Invalid conversation ID '{conversation_id}'. Expected an ID that begins with 'conv_'."
+        super().__init__(message)
--- a/src/llama_stack/apis/common/job_types.py
+++ b/src/llama_stack/apis/common/job_types.py
@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from enum import Enum
+
+from pydantic import BaseModel
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class JobStatus(Enum):
+    """Status of a job execution.
+    :cvar completed: Job has finished successfully
+    :cvar in_progress: Job is currently running
+    :cvar failed: Job has failed during execution
+    :cvar scheduled: Job is scheduled but not yet started
+    :cvar cancelled: Job was cancelled before completion
+    """
+
+    completed = "completed"
+    in_progress = "in_progress"
+    failed = "failed"
+    scheduled = "scheduled"
+    cancelled = "cancelled"
+
+
+@json_schema_type
+class Job(BaseModel):
+    """A job execution instance with status tracking.
+
+    :param job_id: Unique identifier for the job
+    :param status: Current execution status of the job
+    """
+
+    job_id: str
+    status: JobStatus
--- a/src/llama_stack/apis/common/responses.py
+++ b/src/llama_stack/apis/common/responses.py
@ -0,0 +1,36 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class Order(Enum):
+    """Sort order for paginated responses.
+    :cvar asc: Ascending order
+    :cvar desc: Descending order
+    """
+
+    asc = "asc"
+    desc = "desc"
+
+
+@json_schema_type
+class PaginatedResponse(BaseModel):
+    """A generic paginated response that follows a simple format.
+
+    :param data: The list of items for the current page
+    :param has_more: Whether there are more items available after this set
+    :param url: The URL for accessing this list
+    """
+
+    data: list[dict[str, Any]]
+    has_more: bool
+    url: str | None = None
--- a/src/llama_stack/apis/common/training_types.py
+++ b/src/llama_stack/apis/common/training_types.py
@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from datetime import datetime
+
+from pydantic import BaseModel
+
+from llama_stack.schema_utils import json_schema_type
+
+
+@json_schema_type
+class PostTrainingMetric(BaseModel):
+    """Training metrics captured during post-training jobs.
+
+    :param epoch: Training epoch number
+    :param train_loss: Loss value on the training dataset
+    :param validation_loss: Loss value on the validation dataset
+    :param perplexity: Perplexity metric indicating model confidence
+    """
+
+    epoch: int
+    train_loss: float
+    validation_loss: float
+    perplexity: float
+
+
+@json_schema_type
+class Checkpoint(BaseModel):
+    """Checkpoint created during training runs.
+
+    :param identifier: Unique identifier for the checkpoint
+    :param created_at: Timestamp when the checkpoint was created
+    :param epoch: Training epoch when the checkpoint was saved
+    :param post_training_job_id: Identifier of the training job that created this checkpoint
+    :param path: File system path where the checkpoint is stored
+    :param training_metrics: (Optional) Training metrics associated with this checkpoint
+    """
+
+    identifier: str
+    created_at: datetime
+    epoch: int
+    post_training_job_id: str
+    path: str
+    training_metrics: PostTrainingMetric | None = None
--- a/src/llama_stack/apis/common/type_system.py
+++ b/src/llama_stack/apis/common/type_system.py
@ -0,0 +1,158 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Annotated, Literal
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type, register_schema
+
+
+@json_schema_type
+class StringType(BaseModel):
+    """Parameter type for string values.
+
+    :param type: Discriminator type. Always "string"
+    """
+
+    type: Literal["string"] = "string"
+
+
+@json_schema_type
+class NumberType(BaseModel):
+    """Parameter type for numeric values.
+
+    :param type: Discriminator type. Always "number"
+    """
+
+    type: Literal["number"] = "number"
+
+
+@json_schema_type
+class BooleanType(BaseModel):
+    """Parameter type for boolean values.
+
+    :param type: Discriminator type. Always "boolean"
+    """
+
+    type: Literal["boolean"] = "boolean"
+
+
+@json_schema_type
+class ArrayType(BaseModel):
+    """Parameter type for array values.
+
+    :param type: Discriminator type. Always "array"
+    """
+
+    type: Literal["array"] = "array"
+
+
+@json_schema_type
+class ObjectType(BaseModel):
+    """Parameter type for object values.
+
+    :param type: Discriminator type. Always "object"
+    """
+
+    type: Literal["object"] = "object"
+
+
+@json_schema_type
+class JsonType(BaseModel):
+    """Parameter type for JSON values.
+
+    :param type: Discriminator type. Always "json"
+    """
+
+    type: Literal["json"] = "json"
+
+
+@json_schema_type
+class UnionType(BaseModel):
+    """Parameter type for union values.
+
+    :param type: Discriminator type. Always "union"
+    """
+
+    type: Literal["union"] = "union"
+
+
+@json_schema_type
+class ChatCompletionInputType(BaseModel):
+    """Parameter type for chat completion input.
+
+    :param type: Discriminator type. Always "chat_completion_input"
+    """
+
+    # expects List[Message] for messages
+    type: Literal["chat_completion_input"] = "chat_completion_input"
+
+
+@json_schema_type
+class CompletionInputType(BaseModel):
+    """Parameter type for completion input.
+
+    :param type: Discriminator type. Always "completion_input"
+    """
+
+    # expects InterleavedTextMedia for content
+    type: Literal["completion_input"] = "completion_input"
+
+
+@json_schema_type
+class AgentTurnInputType(BaseModel):
+    """Parameter type for agent turn input.
+
+    :param type: Discriminator type. Always "agent_turn_input"
+    """
+
+    # expects List[Message] for messages (may also include attachments?)
+    type: Literal["agent_turn_input"] = "agent_turn_input"
+
+
+@json_schema_type
+class DialogType(BaseModel):
+    """Parameter type for dialog data with semantic output labels.
+
+    :param type: Discriminator type. Always "dialog"
+    """
+
+    # expects List[Message] for messages
+    # this type semantically contains the output label whereas ChatCompletionInputType does not
+    type: Literal["dialog"] = "dialog"
+
+
+ParamType = Annotated[
+    StringType
+    | NumberType
+    | BooleanType
+    | ArrayType
+    | ObjectType
+    | JsonType
+    | UnionType
+    | ChatCompletionInputType
+    | CompletionInputType
+    | AgentTurnInputType,
+    Field(discriminator="type"),
+]
+register_schema(ParamType, name="ParamType")
+
+"""
+# TODO: recursive definition of ParamType in these containers
+# will cause infinite recursion in OpenAPI generation script
+# since we are going with ChatCompletionInputType and CompletionInputType
+# we don't need to worry about ArrayType/ObjectType/UnionType for now
+ArrayType.model_rebuild()
+ObjectType.model_rebuild()
+UnionType.model_rebuild()
+
+
+class CustomType(BaseModel):
+pylint: disable=syntax-error
+    type: Literal["custom"] = "custom"
+    validator_class: str
+"""
--- a/src/llama_stack/apis/conversations/init.py
+++ b/src/llama_stack/apis/conversations/init.py
@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .conversations import (
+    Conversation,
+    ConversationCreateRequest,
+    ConversationDeletedResource,
+    ConversationItem,
+    ConversationItemCreateRequest,
+    ConversationItemDeletedResource,
+    ConversationItemList,
+    Conversations,
+    ConversationUpdateRequest,
+    Metadata,
+)
+
+__all__ = [
+    "Conversation",
+    "ConversationCreateRequest",
+    "ConversationDeletedResource",
+    "ConversationItem",
+    "ConversationItemCreateRequest",
+    "ConversationItemDeletedResource",
+    "ConversationItemList",
+    "Conversations",
+    "ConversationUpdateRequest",
+    "Metadata",
+]
--- a/src/llama_stack/apis/conversations/conversations.py
+++ b/src/llama_stack/apis/conversations/conversations.py
@ -0,0 +1,298 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import StrEnum
+from typing import Annotated, Literal, Protocol, runtime_checkable
+
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseMCPApprovalRequest,
+    OpenAIResponseMCPApprovalResponse,
+    OpenAIResponseMessage,
+    OpenAIResponseOutputMessageFileSearchToolCall,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseOutputMessageMCPCall,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseOutputMessageWebSearchToolCall,
+)
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.core.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+
+Metadata = dict[str, str]
+
+
+@json_schema_type
+class Conversation(BaseModel):
+    """OpenAI-compatible conversation object."""
+
+    id: str = Field(..., description="The unique ID of the conversation.")
+    object: Literal["conversation"] = Field(
+        default="conversation", description="The object type, which is always conversation."
+    )
+    created_at: int = Field(
+        ..., description="The time at which the conversation was created, measured in seconds since the Unix epoch."
+    )
+    metadata: Metadata | None = Field(
+        default=None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.",
+    )
+    items: list[dict] | None = Field(
+        default=None,
+        description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
+    )
+
+
+@json_schema_type
+class ConversationMessage(BaseModel):
+    """OpenAI-compatible message item for conversations."""
+
+    id: str = Field(..., description="unique identifier for this message")
+    content: list[dict] = Field(..., description="message content")
+    role: str = Field(..., description="message role")
+    status: str = Field(..., description="message status")
+    type: Literal["message"] = "message"
+    object: Literal["message"] = "message"
+
+
+ConversationItem = Annotated[
+    OpenAIResponseMessage
+    | OpenAIResponseOutputMessageWebSearchToolCall
+    | OpenAIResponseOutputMessageFileSearchToolCall
+    | OpenAIResponseOutputMessageFunctionToolCall
+    | OpenAIResponseInputFunctionToolCallOutput
+    | OpenAIResponseMCPApprovalRequest
+    | OpenAIResponseMCPApprovalResponse
+    | OpenAIResponseOutputMessageMCPCall
+    | OpenAIResponseOutputMessageMCPListTools
+    | OpenAIResponseOutputMessageMCPCall
+    | OpenAIResponseOutputMessageMCPListTools,
+    Field(discriminator="type"),
+]
+register_schema(ConversationItem, name="ConversationItem")
+
+# Using OpenAI types directly caused issues but some notes for reference:
+# Note that ConversationItem is a Annotated Union of the types below:
+# from openai.types.responses import *
+# from openai.types.responses.response_item import *
+# from openai.types.conversations import ConversationItem
+# f = [
+#     ResponseFunctionToolCallItem,
+#     ResponseFunctionToolCallOutputItem,
+#     ResponseFileSearchToolCall,
+#     ResponseFunctionWebSearch,
+#     ImageGenerationCall,
+#     ResponseComputerToolCall,
+#     ResponseComputerToolCallOutputItem,
+#     ResponseReasoningItem,
+#     ResponseCodeInterpreterToolCall,
+#     LocalShellCall,
+#     LocalShellCallOutput,
+#     McpListTools,
+#     McpApprovalRequest,
+#     McpApprovalResponse,
+#     McpCall,
+#     ResponseCustomToolCall,
+#     ResponseCustomToolCallOutput
+# ]
+
+
+@json_schema_type
+class ConversationCreateRequest(BaseModel):
+    """Request body for creating a conversation."""
+
+    items: list[ConversationItem] | None = Field(
+        default=[],
+        description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
+        max_length=20,
+    )
+    metadata: Metadata | None = Field(
+        default={},
+        description="Set of 16 key-value pairs that can be attached to an object. Useful for storing additional information",
+        max_length=16,
+    )
+
+
+@json_schema_type
+class ConversationUpdateRequest(BaseModel):
+    """Request body for updating a conversation."""
+
+    metadata: Metadata = Field(
+        ...,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters.",
+    )
+
+
+@json_schema_type
+class ConversationDeletedResource(BaseModel):
+    """Response for deleted conversation."""
+
+    id: str = Field(..., description="The deleted conversation identifier")
+    object: str = Field(default="conversation.deleted", description="Object type")
+    deleted: bool = Field(default=True, description="Whether the object was deleted")
+
+
+@json_schema_type
+class ConversationItemCreateRequest(BaseModel):
+    """Request body for creating conversation items."""
+
+    items: list[ConversationItem] = Field(
+        ...,
+        description="Items to include in the conversation context. You may add up to 20 items at a time.",
+        max_length=20,
+    )
+
+
+class ConversationItemInclude(StrEnum):
+    """
+    Specify additional output data to include in the model response.
+    """
+
+    web_search_call_action_sources = "web_search_call.action.sources"
+    code_interpreter_call_outputs = "code_interpreter_call.outputs"
+    computer_call_output_output_image_url = "computer_call_output.output.image_url"
+    file_search_call_results = "file_search_call.results"
+    message_input_image_image_url = "message.input_image.image_url"
+    message_output_text_logprobs = "message.output_text.logprobs"
+    reasoning_encrypted_content = "reasoning.encrypted_content"
+
+
+@json_schema_type
+class ConversationItemList(BaseModel):
+    """List of conversation items with pagination."""
+
+    object: str = Field(default="list", description="Object type")
+    data: list[ConversationItem] = Field(..., description="List of conversation items")
+    first_id: str | None = Field(default=None, description="The ID of the first item in the list")
+    last_id: str | None = Field(default=None, description="The ID of the last item in the list")
+    has_more: bool = Field(default=False, description="Whether there are more items available")
+
+
+@json_schema_type
+class ConversationItemDeletedResource(BaseModel):
+    """Response for deleted conversation item."""
+
+    id: str = Field(..., description="The deleted item identifier")
+    object: str = Field(default="conversation.item.deleted", description="Object type")
+    deleted: bool = Field(default=True, description="Whether the object was deleted")
+
+
+@runtime_checkable
+@trace_protocol
+class Conversations(Protocol):
+    """Conversations
+
+    Protocol for conversation management operations."""
+
+    @webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
+    async def create_conversation(
+        self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None
+    ) -> Conversation:
+        """Create a conversation.
+
+        Create a conversation.
+
+        :param items: Initial items to include in the conversation context.
+        :param metadata: Set of key-value pairs that can be attached to an object.
+        :returns: The created conversation object.
+        """
+        ...
+
+    @webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
+    async def get_conversation(self, conversation_id: str) -> Conversation:
+        """Retrieve a conversation.
+
+        Get a conversation with the given ID.
+
+        :param conversation_id: The conversation identifier.
+        :returns: The conversation object.
+        """
+        ...
+
+    @webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
+    async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
+        """Update a conversation.
+
+        Update a conversation's metadata with the given ID.
+
+        :param conversation_id: The conversation identifier.
+        :param metadata: Set of key-value pairs that can be attached to an object.
+        :returns: The updated conversation object.
+        """
+        ...
+
+    @webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
+        """Delete a conversation.
+
+        Delete a conversation with the given ID.
+
+        :param conversation_id: The conversation identifier.
+        :returns: The deleted conversation resource.
+        """
+        ...
+
+    @webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
+    async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
+        """Create items.
+
+        Create items in the conversation.
+
+        :param conversation_id: The conversation identifier.
+        :param items: Items to include in the conversation context.
+        :returns: List of created items.
+        """
+        ...
+
+    @webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
+    async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
+        """Retrieve an item.
+
+        Retrieve a conversation item.
+
+        :param conversation_id: The conversation identifier.
+        :param item_id: The item identifier.
+        :returns: The conversation item.
+        """
+        ...
+
+    @webmethod(route="/conversations/{conversation_id}/items", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_items(
+        self,
+        conversation_id: str,
+        after: str | None = None,
+        include: list[ConversationItemInclude] | None = None,
+        limit: int | None = None,
+        order: Literal["asc", "desc"] | None = None,
+    ) -> ConversationItemList:
+        """List items.
+
+        List items in the conversation.
+
+        :param conversation_id: The conversation identifier.
+        :param after: An item ID to list items after, used in pagination.
+        :param include: Specify additional output data to include in the response.
+        :param limit: A limit on the number of objects to be returned (1-100, default 20).
+        :param order: The order to return items in (asc or desc, default desc).
+        :returns: List of conversation items.
+        """
+        ...
+
+    @webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def openai_delete_conversation_item(
+        self, conversation_id: str, item_id: str
+    ) -> ConversationItemDeletedResource:
+        """Delete an item.
+
+        Delete a conversation item.
+
+        :param conversation_id: The conversation identifier.
+        :param item_id: The item identifier.
+        :returns: The deleted item resource.
+        """
+        ...
--- a/src/llama_stack/apis/datasetio/init.py
+++ b/src/llama_stack/apis/datasetio/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .datasetio import *
--- a/src/llama_stack/apis/datasetio/datasetio.py
+++ b/src/llama_stack/apis/datasetio/datasetio.py
@ -0,0 +1,59 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Protocol, runtime_checkable
+
+from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack.apis.datasets import Dataset
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA
+from llama_stack.schema_utils import webmethod
+
+
+class DatasetStore(Protocol):
+    def get_dataset(self, dataset_id: str) -> Dataset: ...
+
+
+@runtime_checkable
+class DatasetIO(Protocol):
+    # keeping for aligning with inference/safety, but this is not used
+    dataset_store: DatasetStore
+
+    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
+    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
+    async def iterrows(
+        self,
+        dataset_id: str,
+        start_index: int | None = None,
+        limit: int | None = None,
+    ) -> PaginatedResponse:
+        """Get a paginated list of rows from a dataset.
+
+        Uses offset-based pagination where:
+        - start_index: The starting index (0-based). If None, starts from beginning.
+        - limit: Number of items to return. If None or -1, returns all items.
+
+        The response includes:
+        - data: List of items for the current page.
+        - has_more: Whether there are more items available after this set.
+
+        :param dataset_id: The ID of the dataset to get the rows from.
+        :param start_index: Index into dataset for the first row to get. Get all rows if None.
+        :param limit: The number of rows to get.
+        :returns: A PaginatedResponse.
+        """
+        ...
+
+    @webmethod(
+        route="/datasetio/append-rows/{dataset_id:path}", method="POST", deprecated=True, level=LLAMA_STACK_API_V1
+    )
+    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA)
+    async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
+        """Append rows to a dataset.
+
+        :param dataset_id: The ID of the dataset to append the rows to.
+        :param rows: The rows to append to the dataset.
+        """
+        ...
--- a/src/llama_stack/apis/datasets/init.py
+++ b/src/llama_stack/apis/datasets/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .datasets import *
--- a/src/llama_stack/apis/datasets/datasets.py
+++ b/src/llama_stack/apis/datasets/datasets.py
@ -0,0 +1,251 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import Enum, StrEnum
+from typing import Annotated, Any, Literal, Protocol
+
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA
+from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+
+
+class DatasetPurpose(StrEnum):
+    """
+    Purpose of the dataset. Each purpose has a required input data schema.
+
+    :cvar post-training/messages: The dataset contains messages used for post-training.
+        {
+            "messages": [
+                {"role": "user", "content": "Hello, world!"},
+                {"role": "assistant", "content": "Hello, world!"},
+            ]
+        }
+    :cvar eval/question-answer: The dataset contains a question column and an answer column.
+        {
+            "question": "What is the capital of France?",
+            "answer": "Paris"
+        }
+    :cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column.
+        {
+            "messages": [
+                {"role": "user", "content": "Hello, my name is John Doe."},
+                {"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
+                {"role": "user", "content": "What's my name?"},
+            ],
+            "answer": "John Doe"
+        }
+    """
+
+    post_training_messages = "post-training/messages"
+    eval_question_answer = "eval/question-answer"
+    eval_messages_answer = "eval/messages-answer"
+
+    # TODO: add more schemas here
+
+
+class DatasetType(Enum):
+    """
+    Type of the dataset source.
+    :cvar uri: The dataset can be obtained from a URI.
+    :cvar rows: The dataset is stored in rows.
+    """
+
+    uri = "uri"
+    rows = "rows"
+
+
+@json_schema_type
+class URIDataSource(BaseModel):
+    """A dataset that can be obtained from a URI.
+    :param uri: The dataset can be obtained from a URI. E.g.
+        - "https://mywebsite.com/mydata.jsonl"
+        - "lsfs://mydata.jsonl"
+        - "data:csv;base64,{base64_content}"
+    """
+
+    type: Literal["uri"] = "uri"
+    uri: str
+
+
+@json_schema_type
+class RowsDataSource(BaseModel):
+    """A dataset stored in rows.
+    :param rows: The dataset is stored in rows. E.g.
+        - [
+            {"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
+        ]
+    """
+
+    type: Literal["rows"] = "rows"
+    rows: list[dict[str, Any]]
+
+
+DataSource = Annotated[
+    URIDataSource | RowsDataSource,
+    Field(discriminator="type"),
+]
+register_schema(DataSource, name="DataSource")
+
+
+class CommonDatasetFields(BaseModel):
+    """
+    Common fields for a dataset.
+
+    :param purpose: Purpose of the dataset indicating its intended use
+    :param source: Data source configuration for the dataset
+    :param metadata: Additional metadata for the dataset
+    """
+
+    purpose: DatasetPurpose
+    source: DataSource
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Any additional metadata for this dataset",
+    )
+
+
+@json_schema_type
+class Dataset(CommonDatasetFields, Resource):
+    """Dataset resource for storing and accessing training or evaluation data.
+
+    :param type: Type of resource, always 'dataset' for datasets
+    """
+
+    type: Literal[ResourceType.dataset] = ResourceType.dataset
+
+    @property
+    def dataset_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_dataset_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class DatasetInput(CommonDatasetFields, BaseModel):
+    """Input parameters for dataset operations.
+
+    :param dataset_id: Unique identifier for the dataset
+    """
+
+    dataset_id: str
+
+
+class ListDatasetsResponse(BaseModel):
+    """Response from listing datasets.
+
+    :param data: List of datasets
+    """
+
+    data: list[Dataset]
+
+
+class Datasets(Protocol):
+    @webmethod(route="/datasets", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
+    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
+    async def register_dataset(
+        self,
+        purpose: DatasetPurpose,
+        source: DataSource,
+        metadata: dict[str, Any] | None = None,
+        dataset_id: str | None = None,
+    ) -> Dataset:
+        """
+        Register a new dataset.
+
+        :param purpose: The purpose of the dataset.
+        One of:
+            - "post-training/messages": The dataset contains a messages column with list of messages for post-training.
+                {
+                    "messages": [
+                        {"role": "user", "content": "Hello, world!"},
+                        {"role": "assistant", "content": "Hello, world!"},
+                    ]
+                }
+            - "eval/question-answer": The dataset contains a question column and an answer column for evaluation.
+                {
+                    "question": "What is the capital of France?",
+                    "answer": "Paris"
+                }
+            - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation.
+                {
+                    "messages": [
+                        {"role": "user", "content": "Hello, my name is John Doe."},
+                        {"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
+                        {"role": "user", "content": "What's my name?"},
+                    ],
+                    "answer": "John Doe"
+                }
+        :param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples:
+           - {
+               "type": "uri",
+               "uri": "https://mywebsite.com/mydata.jsonl"
+           }
+           - {
+               "type": "uri",
+               "uri": "lsfs://mydata.jsonl"
+           }
+           - {
+               "type": "uri",
+               "uri": "data:csv;base64,{base64_content}"
+           }
+           - {
+               "type": "uri",
+               "uri": "huggingface://llamastack/simpleqa?split=train"
+           }
+           - {
+               "type": "rows",
+               "rows": [
+                   {
+                       "messages": [
+                           {"role": "user", "content": "Hello, world!"},
+                           {"role": "assistant", "content": "Hello, world!"},
+                       ]
+                   }
+               ]
+           }
+        :param metadata: The metadata for the dataset.
+           - E.g. {"description": "My dataset"}.
+        :param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
+        :returns: A Dataset.
+        """
+        ...
+
+    @webmethod(route="/datasets/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
+    @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
+    async def get_dataset(
+        self,
+        dataset_id: str,
+    ) -> Dataset:
+        """Get a dataset by its ID.
+
+        :param dataset_id: The ID of the dataset to get.
+        :returns: A Dataset.
+        """
+        ...
+
+    @webmethod(route="/datasets", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
+    @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
+    async def list_datasets(self) -> ListDatasetsResponse:
+        """List all datasets.
+
+        :returns: A ListDatasetsResponse.
+        """
+        ...
+
+    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1)
+    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
+    async def unregister_dataset(
+        self,
+        dataset_id: str,
+    ) -> None:
+        """Unregister a dataset by its ID.
+
+        :param dataset_id: The ID of the dataset to unregister.
+        """
+        ...
--- a/src/llama_stack/apis/datatypes.py
+++ b/src/llama_stack/apis/datatypes.py
@ -0,0 +1,158 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import Enum, EnumMeta
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class DynamicApiMeta(EnumMeta):
+    def __new__(cls, name, bases, namespace):
+        # Store the original enum values
+        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
+
+        # Create the enum class
+        cls = super().__new__(cls, name, bases, namespace)
+
+        # Store the original values for reference
+        cls._original_values = original_values
+        # Initialize _dynamic_values
+        cls._dynamic_values = {}
+
+        return cls
+
+    def __call__(cls, value):
+        try:
+            return super().__call__(value)
+        except ValueError as e:
+            # If this value was already dynamically added, return it
+            if value in cls._dynamic_values:
+                return cls._dynamic_values[value]
+
+            # If the value doesn't exist, create a new enum member
+            # Create a new member name from the value
+            member_name = value.lower().replace("-", "_")
+
+            # If this member name already exists in the enum, return the existing member
+            if member_name in cls._member_map_:
+                return cls._member_map_[member_name]
+
+            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
+            # register new APIs explicitly
+            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
+
+    def __iter__(cls):
+        # Allow iteration over both static and dynamic members
+        yield from super().__iter__()
+        if hasattr(cls, "_dynamic_values"):
+            yield from cls._dynamic_values.values()
+
+    def add(cls, value):
+        """
+        Add a new API to the enum.
+        Used to register external APIs.
+        """
+        member_name = value.lower().replace("-", "_")
+
+        # If this member name already exists in the enum, return it
+        if member_name in cls._member_map_:
+            return cls._member_map_[member_name]
+
+        # Create a new enum member
+        member = object.__new__(cls)
+        member._name_ = member_name
+        member._value_ = value
+
+        # Add it to the enum class
+        cls._member_map_[member_name] = member
+        cls._member_names_.append(member_name)
+        cls._member_type_ = str
+
+        # Store it in our dynamic values
+        cls._dynamic_values[value] = member
+
+        return member
+
+
+@json_schema_type
+class Api(Enum, metaclass=DynamicApiMeta):
+    """Enumeration of all available APIs in the Llama Stack system.
+    :cvar providers: Provider management and configuration
+    :cvar inference: Text generation, chat completions, and embeddings
+    :cvar safety: Content moderation and safety shields
+    :cvar agents: Agent orchestration and execution
+    :cvar batches: Batch processing for asynchronous API requests
+    :cvar vector_io: Vector database operations and queries
+    :cvar datasetio: Dataset input/output operations
+    :cvar scoring: Model output evaluation and scoring
+    :cvar eval: Model evaluation and benchmarking framework
+    :cvar post_training: Fine-tuning and model training
+    :cvar tool_runtime: Tool execution and management
+    :cvar telemetry: Observability and system monitoring
+    :cvar models: Model metadata and management
+    :cvar shields: Safety shield implementations
+    :cvar datasets: Dataset creation and management
+    :cvar scoring_functions: Scoring function definitions
+    :cvar benchmarks: Benchmark suite management
+    :cvar tool_groups: Tool group organization
+    :cvar files: File storage and management
+    :cvar prompts: Prompt versions and management
+    :cvar inspect: Built-in system inspection and introspection
+    """
+
+    providers = "providers"
+    inference = "inference"
+    safety = "safety"
+    agents = "agents"
+    batches = "batches"
+    vector_io = "vector_io"
+    datasetio = "datasetio"
+    scoring = "scoring"
+    eval = "eval"
+    post_training = "post_training"
+    tool_runtime = "tool_runtime"
+
+    models = "models"
+    shields = "shields"
+    vector_stores = "vector_stores"  # only used for routing table
+    datasets = "datasets"
+    scoring_functions = "scoring_functions"
+    benchmarks = "benchmarks"
+    tool_groups = "tool_groups"
+    files = "files"
+    prompts = "prompts"
+    conversations = "conversations"
+
+    # built-in API
+    inspect = "inspect"
+
+
+@json_schema_type
+class Error(BaseModel):
+    """
+    Error response from the API. Roughly follows RFC 7807.
+
+    :param status: HTTP status code
+    :param title: Error title, a short summary of the error which is invariant for an error type
+    :param detail: Error detail, a longer human-readable description of the error
+    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
+    """
+
+    status: int
+    title: str
+    detail: str
+    instance: str | None = None
+
+
+class ExternalApiSpec(BaseModel):
+    """Specification for an external API implementation."""
+
+    module: str = Field(..., description="Python module containing the API implementation")
+    name: str = Field(..., description="Name of the API")
+    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
+    protocol: str = Field(..., description="Name of the protocol class for the API")
--- a/src/llama_stack/apis/eval/init.py
+++ b/src/llama_stack/apis/eval/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .eval import *
--- a/src/llama_stack/apis/eval/eval.py
+++ b/src/llama_stack/apis/eval/eval.py
@ -0,0 +1,169 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Annotated, Any, Literal, Protocol
+
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.agents import AgentConfig
+from llama_stack.apis.common.job_types import Job
+from llama_stack.apis.inference import SamplingParams, SystemMessage
+from llama_stack.apis.scoring import ScoringResult
+from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
+from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+
+
+@json_schema_type
+class ModelCandidate(BaseModel):
+    """A model candidate for evaluation.
+
+    :param model: The model ID to evaluate.
+    :param sampling_params: The sampling parameters for the model.
+    :param system_message: (Optional) The system message providing instructions or context to the model.
+    """
+
+    type: Literal["model"] = "model"
+    model: str
+    sampling_params: SamplingParams
+    system_message: SystemMessage | None = None
+
+
+@json_schema_type
+class AgentCandidate(BaseModel):
+    """An agent candidate for evaluation.
+
+    :param config: The configuration for the agent candidate.
+    """
+
+    type: Literal["agent"] = "agent"
+    config: AgentConfig
+
+
+EvalCandidate = Annotated[ModelCandidate | AgentCandidate, Field(discriminator="type")]
+register_schema(EvalCandidate, name="EvalCandidate")
+
+
+@json_schema_type
+class BenchmarkConfig(BaseModel):
+    """A benchmark configuration for evaluation.
+
+    :param eval_candidate: The candidate to evaluate.
+    :param scoring_params: Map between scoring function id and parameters for each scoring function you want to run
+    :param num_examples: (Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated
+    """
+
+    eval_candidate: EvalCandidate
+    scoring_params: dict[str, ScoringFnParams] = Field(
+        description="Map between scoring function id and parameters for each scoring function you want to run",
+        default_factory=dict,
+    )
+    num_examples: int | None = Field(
+        description="Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated",
+        default=None,
+    )
+    # we could optinally add any specific dataset config here
+
+
+@json_schema_type
+class EvaluateResponse(BaseModel):
+    """The response from an evaluation.
+
+    :param generations: The generations from the evaluation.
+    :param scores: The scores from the evaluation.
+    """
+
+    generations: list[dict[str, Any]]
+    # each key in the dict is a scoring function name
+    scores: dict[str, ScoringResult]
+
+
+class Eval(Protocol):
+    """Evaluations
+
+    Llama Stack Evaluation API for running evaluations on model and agent candidates."""
+
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
+    async def run_eval(
+        self,
+        benchmark_id: str,
+        benchmark_config: BenchmarkConfig,
+    ) -> Job:
+        """Run an evaluation on a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param benchmark_config: The configuration for the benchmark.
+        :returns: The job that was created to run the evaluation.
+        """
+        ...
+
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
+    async def evaluate_rows(
+        self,
+        benchmark_id: str,
+        input_rows: list[dict[str, Any]],
+        scoring_functions: list[str],
+        benchmark_config: BenchmarkConfig,
+    ) -> EvaluateResponse:
+        """Evaluate a list of rows on a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param input_rows: The rows to evaluate.
+        :param scoring_functions: The scoring functions to use for the evaluation.
+        :param benchmark_config: The configuration for the benchmark.
+        :returns: EvaluateResponse object containing generations and scores.
+        """
+        ...
+
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
+    async def job_status(self, benchmark_id: str, job_id: str) -> Job:
+        """Get the status of a job.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param job_id: The ID of the job to get the status of.
+        :returns: The status of the evaluation job.
+        """
+        ...
+
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
+    async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
+        """Cancel a job.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param job_id: The ID of the job to cancel.
+        """
+        ...
+
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
+    )
+    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
+        """Get the result of a job.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param job_id: The ID of the job to get the result of.
+        :returns: The result of the job.
+        """
+        ...
--- a/src/llama_stack/apis/files/init.py
+++ b/src/llama_stack/apis/files/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .files import *
--- a/src/llama_stack/apis/files/files.py
+++ b/src/llama_stack/apis/files/files.py
@ -0,0 +1,199 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import StrEnum
+from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
+
+from fastapi import File, Form, Response, UploadFile
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.common.responses import Order
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.core.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+# OpenAI Files API Models
+class OpenAIFilePurpose(StrEnum):
+    """
+    Valid purpose values for OpenAI Files API.
+    """
+
+    ASSISTANTS = "assistants"
+    BATCH = "batch"
+    # TODO: Add other purposes as needed
+
+
+@json_schema_type
+class OpenAIFileObject(BaseModel):
+    """
+    OpenAI File object as defined in the OpenAI Files API.
+
+    :param object: The object type, which is always "file"
+    :param id: The file identifier, which can be referenced in the API endpoints
+    :param bytes: The size of the file, in bytes
+    :param created_at: The Unix timestamp (in seconds) for when the file was created
+    :param expires_at: The Unix timestamp (in seconds) for when the file expires
+    :param filename: The name of the file
+    :param purpose: The intended purpose of the file
+    """
+
+    object: Literal["file"] = "file"
+    id: str
+    bytes: int
+    created_at: int
+    expires_at: int
+    filename: str
+    purpose: OpenAIFilePurpose
+
+
+@json_schema_type
+class ExpiresAfter(BaseModel):
+    """
+    Control expiration of uploaded files.
+
+    Params:
+     - anchor, must be "created_at"
+     - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
+    """
+
+    MIN: ClassVar[int] = 3600  # 1 hour
+    MAX: ClassVar[int] = 2592000  # 30 days
+
+    anchor: Literal["created_at"]
+    seconds: int = Field(..., ge=3600, le=2592000)
+
+
+@json_schema_type
+class ListOpenAIFileResponse(BaseModel):
+    """
+    Response for listing files in OpenAI Files API.
+
+    :param data: List of file objects
+    :param has_more: Whether there are more files available beyond this page
+    :param first_id: ID of the first file in the list for pagination
+    :param last_id: ID of the last file in the list for pagination
+    :param object: The object type, which is always "list"
+    """
+
+    data: list[OpenAIFileObject]
+    has_more: bool
+    first_id: str
+    last_id: str
+    object: Literal["list"] = "list"
+
+
+@json_schema_type
+class OpenAIFileDeleteResponse(BaseModel):
+    """
+    Response for deleting a file in OpenAI Files API.
+
+    :param id: The file identifier that was deleted
+    :param object: The object type, which is always "file"
+    :param deleted: Whether the file was successfully deleted
+    """
+
+    id: str
+    object: Literal["file"] = "file"
+    deleted: bool
+
+
+@runtime_checkable
+@trace_protocol
+class Files(Protocol):
+    """Files
+
+    This API is used to upload documents that can be used with other Llama Stack APIs.
+    """
+
+    # OpenAI Files API Endpoints
+    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
+    async def openai_upload_file(
+        self,
+        file: Annotated[UploadFile, File()],
+        purpose: Annotated[OpenAIFilePurpose, Form()],
+        expires_after: Annotated[ExpiresAfter | None, Form()] = None,
+    ) -> OpenAIFileObject:
+        """Upload file.
+
+        Upload a file that can be used across various endpoints.
+
+        The file upload should be a multipart form request with:
+        - file: The File object (not file name) to be uploaded.
+        - purpose: The intended purpose of the uploaded file.
+        - expires_after: Optional form values describing expiration for the file.
+
+        :param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
+        :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
+        :param expires_after: Optional form values describing expiration for the file.
+        :returns: An OpenAIFileObject representing the uploaded file.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
+    async def openai_list_files(
+        self,
+        after: str | None = None,
+        limit: int | None = 10000,
+        order: Order | None = Order.desc,
+        purpose: OpenAIFilePurpose | None = None,
+    ) -> ListOpenAIFileResponse:
+        """List files.
+
+        Returns a list of files that belong to the user's organization.
+
+        :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
+        :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the default is 10,000.
+        :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
+        :param purpose: Only return files with the given purpose.
+        :returns: An ListOpenAIFileResponse containing the list of files.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
+    async def openai_retrieve_file(
+        self,
+        file_id: str,
+    ) -> OpenAIFileObject:
+        """Retrieve file.
+
+        Returns information about a specific file.
+
+        :param file_id: The ID of the file to use for this request.
+        :returns: An OpenAIFileObject containing file information.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def openai_delete_file(
+        self,
+        file_id: str,
+    ) -> OpenAIFileDeleteResponse:
+        """Delete file.
+
+        :param file_id: The ID of the file to use for this request.
+        :returns: An OpenAIFileDeleteResponse indicating successful deletion.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
+    async def openai_retrieve_file_content(
+        self,
+        file_id: str,
+    ) -> Response:
+        """Retrieve file content.
+
+        Returns the contents of the specified file.
+
+        :param file_id: The ID of the file to use for this request.
+        :returns: The raw file content as a binary response.
+        """
+        ...
--- a/src/llama_stack/apis/inference/init.py
+++ b/src/llama_stack/apis/inference/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .inference import *
--- a/src/llama_stack/apis/inference/event_logger.py
+++ b/src/llama_stack/apis/inference/event_logger.py
@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from termcolor import cprint
+
+from llama_stack.apis.inference import (
+    ChatCompletionResponseEventType,
+    ChatCompletionResponseStreamChunk,
+)
+
+
+class LogEvent:
+    def __init__(
+        self,
+        content: str = "",
+        end: str = "\n",
+        color="white",
+    ):
+        self.content = content
+        self.color = color
+        self.end = "\n" if end is None else end
+
+    def print(self, flush=True):
+        cprint(f"{self.content}", color=self.color, end=self.end, flush=flush)
+
+
+class EventLogger:
+    async def log(self, event_generator):
+        async for chunk in event_generator:
+            if isinstance(chunk, ChatCompletionResponseStreamChunk):
+                event = chunk.event
+                if event.event_type == ChatCompletionResponseEventType.start:
+                    yield LogEvent("Assistant> ", color="cyan", end="")
+                elif event.event_type == ChatCompletionResponseEventType.progress:
+                    yield LogEvent(event.delta, color="yellow", end="")
+                elif event.event_type == ChatCompletionResponseEventType.complete:
+                    yield LogEvent("")
+            else:
+                yield LogEvent("Assistant> ", color="cyan", end="")
+                yield LogEvent(chunk.completion_message.content, color="yellow")
--- a/src/llama_stack/apis/inference/inference.py
+++ b/src/llama_stack/apis/inference/inference.py
--- a/src/llama_stack/apis/inspect/init.py
+++ b/src/llama_stack/apis/inspect/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .inspect import *
--- a/src/llama_stack/apis/inspect/inspect.py
+++ b/src/llama_stack/apis/inspect/inspect.py
@ -0,0 +1,94 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.providers.datatypes import HealthStatus
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class RouteInfo(BaseModel):
+    """Information about an API route including its path, method, and implementing providers.
+
+    :param route: The API endpoint path
+    :param method: HTTP method for the route
+    :param provider_types: List of provider types that implement this route
+    """
+
+    route: str
+    method: str
+    provider_types: list[str]
+
+
+@json_schema_type
+class HealthInfo(BaseModel):
+    """Health status information for the service.
+
+    :param status: Current health status of the service
+    """
+
+    status: HealthStatus
+
+
+@json_schema_type
+class VersionInfo(BaseModel):
+    """Version information for the service.
+
+    :param version: Version number of the service
+    """
+
+    version: str
+
+
+class ListRoutesResponse(BaseModel):
+    """Response containing a list of all available API routes.
+
+    :param data: List of available route information objects
+    """
+
+    data: list[RouteInfo]
+
+
+@runtime_checkable
+class Inspect(Protocol):
+    """Inspect
+
+    APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
+    """
+
+    @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_routes(self) -> ListRoutesResponse:
+        """List routes.
+
+        List all available API routes with their methods and implementing providers.
+
+        :returns: Response containing information about all available routes.
+        """
+        ...
+
+    @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1, require_authentication=False)
+    async def health(self) -> HealthInfo:
+        """Get health status.
+
+        Get the current health status of the service.
+
+        :returns: Health information indicating if the service is operational.
+        """
+        ...
+
+    @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1, require_authentication=False)
+    async def version(self) -> VersionInfo:
+        """Get version.
+
+        Get the version of the service.
+
+        :returns: Version information containing the service version number.
+        """
+        ...
--- a/src/llama_stack/apis/models/init.py
+++ b/src/llama_stack/apis/models/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .models import *
--- a/src/llama_stack/apis/models/models.py
+++ b/src/llama_stack/apis/models/models.py
@ -0,0 +1,171 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import StrEnum
+from typing import Any, Literal, Protocol, runtime_checkable
+
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+
+from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.core.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+class CommonModelFields(BaseModel):
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Any additional metadata for this model",
+    )
+
+
+@json_schema_type
+class ModelType(StrEnum):
+    """Enumeration of supported model types in Llama Stack.
+    :cvar llm: Large language model for text generation and completion
+    :cvar embedding: Embedding model for converting text to vector representations
+    :cvar rerank: Reranking model for reordering documents based on their relevance to a query
+    """
+
+    llm = "llm"
+    embedding = "embedding"
+    rerank = "rerank"
+
+
+@json_schema_type
+class Model(CommonModelFields, Resource):
+    """A model resource representing an AI model registered in Llama Stack.
+
+    :param type: The resource type, always 'model' for model resources
+    :param model_type: The type of model (LLM or embedding model)
+    :param metadata: Any additional metadata for this model
+    :param identifier: Unique identifier for this resource in llama stack
+    :param provider_resource_id: Unique identifier for this resource in the provider
+    :param provider_id: ID of the provider that owns this resource
+    """
+
+    type: Literal[ResourceType.model] = ResourceType.model
+
+    @property
+    def model_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_model_id(self) -> str:
+        assert self.provider_resource_id is not None, "Provider resource ID must be set"
+        return self.provider_resource_id
+
+    model_config = ConfigDict(protected_namespaces=())
+
+    model_type: ModelType = Field(default=ModelType.llm)
+
+    @field_validator("provider_resource_id")
+    @classmethod
+    def validate_provider_resource_id(cls, v):
+        if v is None:
+            raise ValueError("provider_resource_id cannot be None")
+        return v
+
+
+class ModelInput(CommonModelFields):
+    model_id: str
+    provider_id: str | None = None
+    provider_model_id: str | None = None
+    model_type: ModelType | None = ModelType.llm
+    model_config = ConfigDict(protected_namespaces=())
+
+
+class ListModelsResponse(BaseModel):
+    data: list[Model]
+
+
+@json_schema_type
+class OpenAIModel(BaseModel):
+    """A model from OpenAI.
+
+    :id: The ID of the model
+    :object: The object type, which will be "model"
+    :created: The Unix timestamp in seconds when the model was created
+    :owned_by: The owner of the model
+    """
+
+    id: str
+    object: Literal["model"] = "model"
+    created: int
+    owned_by: str
+
+
+class OpenAIListModelsResponse(BaseModel):
+    data: list[OpenAIModel]
+
+
+@runtime_checkable
+@trace_protocol
+class Models(Protocol):
+    @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_models(self) -> ListModelsResponse:
+        """List all models.
+
+        :returns: A ListModelsResponse.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    async def openai_list_models(self) -> OpenAIListModelsResponse:
+        """List models using the OpenAI API.
+
+        :returns: A OpenAIListModelsResponse.
+        """
+        ...
+
+    @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
+    async def get_model(
+        self,
+        model_id: str,
+    ) -> Model:
+        """Get model.
+
+        Get a model by its identifier.
+
+        :param model_id: The identifier of the model to get.
+        :returns: A Model.
+        """
+        ...
+
+    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
+    async def register_model(
+        self,
+        model_id: str,
+        provider_model_id: str | None = None,
+        provider_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+        model_type: ModelType | None = None,
+    ) -> Model:
+        """Register model.
+
+        Register a model.
+
+        :param model_id: The identifier of the model to register.
+        :param provider_model_id: The identifier of the model in the provider.
+        :param provider_id: The identifier of the provider.
+        :param metadata: Any additional metadata for this model.
+        :param model_type: The type of model to register.
+        :returns: A Model.
+        """
+        ...
+
+    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def unregister_model(
+        self,
+        model_id: str,
+    ) -> None:
+        """Unregister model.
+
+        Unregister a model.
+
+        :param model_id: The identifier of the model to unregister.
+        """
+        ...
--- a/src/llama_stack/apis/post_training/init.py
+++ b/src/llama_stack/apis/post_training/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .post_training import *
--- a/src/llama_stack/apis/post_training/post_training.py
+++ b/src/llama_stack/apis/post_training/post_training.py
@ -0,0 +1,374 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from datetime import datetime
+from enum import Enum
+from typing import Annotated, Any, Literal, Protocol
+
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.common.content_types import URL
+from llama_stack.apis.common.job_types import JobStatus
+from llama_stack.apis.common.training_types import Checkpoint
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
+from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+
+
+@json_schema_type
+class OptimizerType(Enum):
+    """Available optimizer algorithms for training.
+    :cvar adam: Adaptive Moment Estimation optimizer
+    :cvar adamw: AdamW optimizer with weight decay
+    :cvar sgd: Stochastic Gradient Descent optimizer
+    """
+
+    adam = "adam"
+    adamw = "adamw"
+    sgd = "sgd"
+
+
+@json_schema_type
+class DatasetFormat(Enum):
+    """Format of the training dataset.
+    :cvar instruct: Instruction-following format with prompt and completion
+    :cvar dialog: Multi-turn conversation format with messages
+    """
+
+    instruct = "instruct"
+    dialog = "dialog"
+
+
+@json_schema_type
+class DataConfig(BaseModel):
+    """Configuration for training data and data loading.
+
+    :param dataset_id: Unique identifier for the training dataset
+    :param batch_size: Number of samples per training batch
+    :param shuffle: Whether to shuffle the dataset during training
+    :param data_format: Format of the dataset (instruct or dialog)
+    :param validation_dataset_id: (Optional) Unique identifier for the validation dataset
+    :param packed: (Optional) Whether to pack multiple samples into a single sequence for efficiency
+    :param train_on_input: (Optional) Whether to compute loss on input tokens as well as output tokens
+    """
+
+    dataset_id: str
+    batch_size: int
+    shuffle: bool
+    data_format: DatasetFormat
+    validation_dataset_id: str | None = None
+    packed: bool | None = False
+    train_on_input: bool | None = False
+
+
+@json_schema_type
+class OptimizerConfig(BaseModel):
+    """Configuration parameters for the optimization algorithm.
+
+    :param optimizer_type: Type of optimizer to use (adam, adamw, or sgd)
+    :param lr: Learning rate for the optimizer
+    :param weight_decay: Weight decay coefficient for regularization
+    :param num_warmup_steps: Number of steps for learning rate warmup
+    """
+
+    optimizer_type: OptimizerType
+    lr: float
+    weight_decay: float
+    num_warmup_steps: int
+
+
+@json_schema_type
+class EfficiencyConfig(BaseModel):
+    """Configuration for memory and compute efficiency optimizations.
+
+    :param enable_activation_checkpointing: (Optional) Whether to use activation checkpointing to reduce memory usage
+    :param enable_activation_offloading: (Optional) Whether to offload activations to CPU to save GPU memory
+    :param memory_efficient_fsdp_wrap: (Optional) Whether to use memory-efficient FSDP wrapping
+    :param fsdp_cpu_offload: (Optional) Whether to offload FSDP parameters to CPU
+    """
+
+    enable_activation_checkpointing: bool | None = False
+    enable_activation_offloading: bool | None = False
+    memory_efficient_fsdp_wrap: bool | None = False
+    fsdp_cpu_offload: bool | None = False
+
+
+@json_schema_type
+class TrainingConfig(BaseModel):
+    """Comprehensive configuration for the training process.
+
+    :param n_epochs: Number of training epochs to run
+    :param max_steps_per_epoch: Maximum number of steps to run per epoch
+    :param gradient_accumulation_steps: Number of steps to accumulate gradients before updating
+    :param max_validation_steps: (Optional) Maximum number of validation steps per epoch
+    :param data_config: (Optional) Configuration for data loading and formatting
+    :param optimizer_config: (Optional) Configuration for the optimization algorithm
+    :param efficiency_config: (Optional) Configuration for memory and compute optimizations
+    :param dtype: (Optional) Data type for model parameters (bf16, fp16, fp32)
+    """
+
+    n_epochs: int
+    max_steps_per_epoch: int = 1
+    gradient_accumulation_steps: int = 1
+    max_validation_steps: int | None = 1
+    data_config: DataConfig | None = None
+    optimizer_config: OptimizerConfig | None = None
+    efficiency_config: EfficiencyConfig | None = None
+    dtype: str | None = "bf16"
+
+
+@json_schema_type
+class LoraFinetuningConfig(BaseModel):
+    """Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
+
+    :param type: Algorithm type identifier, always "LoRA"
+    :param lora_attn_modules: List of attention module names to apply LoRA to
+    :param apply_lora_to_mlp: Whether to apply LoRA to MLP layers
+    :param apply_lora_to_output: Whether to apply LoRA to output projection layers
+    :param rank: Rank of the LoRA adaptation (lower rank = fewer parameters)
+    :param alpha: LoRA scaling parameter that controls adaptation strength
+    :param use_dora: (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
+    :param quantize_base: (Optional) Whether to quantize the base model weights
+    """
+
+    type: Literal["LoRA"] = "LoRA"
+    lora_attn_modules: list[str]
+    apply_lora_to_mlp: bool
+    apply_lora_to_output: bool
+    rank: int
+    alpha: int
+    use_dora: bool | None = False
+    quantize_base: bool | None = False
+
+
+@json_schema_type
+class QATFinetuningConfig(BaseModel):
+    """Configuration for Quantization-Aware Training (QAT) fine-tuning.
+
+    :param type: Algorithm type identifier, always "QAT"
+    :param quantizer_name: Name of the quantization algorithm to use
+    :param group_size: Size of groups for grouped quantization
+    """
+
+    type: Literal["QAT"] = "QAT"
+    quantizer_name: str
+    group_size: int
+
+
+AlgorithmConfig = Annotated[LoraFinetuningConfig | QATFinetuningConfig, Field(discriminator="type")]
+register_schema(AlgorithmConfig, name="AlgorithmConfig")
+
+
+@json_schema_type
+class PostTrainingJobLogStream(BaseModel):
+    """Stream of logs from a finetuning job.
+
+    :param job_uuid: Unique identifier for the training job
+    :param log_lines: List of log message strings from the training process
+    """
+
+    job_uuid: str
+    log_lines: list[str]
+
+
+@json_schema_type
+class RLHFAlgorithm(Enum):
+    """Available reinforcement learning from human feedback algorithms.
+    :cvar dpo: Direct Preference Optimization algorithm
+    """
+
+    dpo = "dpo"
+
+
+@json_schema_type
+class DPOLossType(Enum):
+    sigmoid = "sigmoid"
+    hinge = "hinge"
+    ipo = "ipo"
+    kto_pair = "kto_pair"
+
+
+@json_schema_type
+class DPOAlignmentConfig(BaseModel):
+    """Configuration for Direct Preference Optimization (DPO) alignment.
+
+    :param beta: Temperature parameter for the DPO loss
+    :param loss_type: The type of loss function to use for DPO
+    """
+
+    beta: float
+    loss_type: DPOLossType = DPOLossType.sigmoid
+
+
+@json_schema_type
+class PostTrainingRLHFRequest(BaseModel):
+    """Request to finetune a model using reinforcement learning from human feedback.
+
+    :param job_uuid: Unique identifier for the training job
+    :param finetuned_model: URL or path to the base model to fine-tune
+    :param dataset_id: Unique identifier for the training dataset
+    :param validation_dataset_id: Unique identifier for the validation dataset
+    :param algorithm: RLHF algorithm to use for training
+    :param algorithm_config: Configuration parameters for the RLHF algorithm
+    :param optimizer_config: Configuration parameters for the optimization algorithm
+    :param training_config: Configuration parameters for the training process
+    :param hyperparam_search_config: Configuration for hyperparameter search
+    :param logger_config: Configuration for training logging
+    """
+
+    job_uuid: str
+
+    finetuned_model: URL
+
+    dataset_id: str
+    validation_dataset_id: str
+
+    algorithm: RLHFAlgorithm
+    algorithm_config: DPOAlignmentConfig
+
+    optimizer_config: OptimizerConfig
+    training_config: TrainingConfig
+
+    # TODO: define these
+    hyperparam_search_config: dict[str, Any]
+    logger_config: dict[str, Any]
+
+
+class PostTrainingJob(BaseModel):
+    job_uuid: str
+
+
+@json_schema_type
+class PostTrainingJobStatusResponse(BaseModel):
+    """Status of a finetuning job.
+
+    :param job_uuid: Unique identifier for the training job
+    :param status: Current status of the training job
+    :param scheduled_at: (Optional) Timestamp when the job was scheduled
+    :param started_at: (Optional) Timestamp when the job execution began
+    :param completed_at: (Optional) Timestamp when the job finished, if completed
+    :param resources_allocated: (Optional) Information about computational resources allocated to the job
+    :param checkpoints: List of model checkpoints created during training
+    """
+
+    job_uuid: str
+    status: JobStatus
+
+    scheduled_at: datetime | None = None
+    started_at: datetime | None = None
+    completed_at: datetime | None = None
+
+    resources_allocated: dict[str, Any] | None = None
+
+    checkpoints: list[Checkpoint] = Field(default_factory=list)
+
+
+class ListPostTrainingJobsResponse(BaseModel):
+    data: list[PostTrainingJob]
+
+
+@json_schema_type
+class PostTrainingJobArtifactsResponse(BaseModel):
+    """Artifacts of a finetuning job.
+
+    :param job_uuid: Unique identifier for the training job
+    :param checkpoints: List of model checkpoints created during training
+    """
+
+    job_uuid: str
+    checkpoints: list[Checkpoint] = Field(default_factory=list)
+
+    # TODO(ashwin): metrics, evals
+
+
+class PostTraining(Protocol):
+    @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
+    async def supervised_fine_tune(
+        self,
+        job_uuid: str,
+        training_config: TrainingConfig,
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
+        model: str | None = Field(
+            default=None,
+            description="Model descriptor for training if not in provider config`",
+        ),
+        checkpoint_dir: str | None = None,
+        algorithm_config: AlgorithmConfig | None = None,
+    ) -> PostTrainingJob:
+        """Run supervised fine-tuning of a model.
+
+        :param job_uuid: The UUID of the job to create.
+        :param training_config: The training configuration.
+        :param hyperparam_search_config: The hyperparam search configuration.
+        :param logger_config: The logger configuration.
+        :param model: The model to fine-tune.
+        :param checkpoint_dir: The directory to save checkpoint(s) to.
+        :param algorithm_config: The algorithm configuration.
+        :returns: A PostTrainingJob.
+        """
+        ...
+
+    @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
+    async def preference_optimize(
+        self,
+        job_uuid: str,
+        finetuned_model: str,
+        algorithm_config: DPOAlignmentConfig,
+        training_config: TrainingConfig,
+        hyperparam_search_config: dict[str, Any],
+        logger_config: dict[str, Any],
+    ) -> PostTrainingJob:
+        """Run preference optimization of a model.
+
+        :param job_uuid: The UUID of the job to create.
+        :param finetuned_model: The model to fine-tune.
+        :param algorithm_config: The algorithm configuration.
+        :param training_config: The training configuration.
+        :param hyperparam_search_config: The hyperparam search configuration.
+        :param logger_config: The logger configuration.
+        :returns: A PostTrainingJob.
+        """
+        ...
+
+    @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
+    async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
+        """Get all training jobs.
+
+        :returns: A ListPostTrainingJobsResponse.
+        """
+        ...
+
+    @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
+    async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
+        """Get the status of a training job.
+
+        :param job_uuid: The UUID of the job to get the status of.
+        :returns: A PostTrainingJobStatusResponse.
+        """
+        ...
+
+    @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
+    async def cancel_training_job(self, job_uuid: str) -> None:
+        """Cancel a training job.
+
+        :param job_uuid: The UUID of the job to cancel.
+        """
+        ...
+
+    @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
+    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
+        """Get the artifacts of a training job.
+
+        :param job_uuid: The UUID of the job to get the artifacts of.
+        :returns: A PostTrainingJobArtifactsResponse.
+        """
+        ...
--- a/src/llama_stack/apis/prompts/init.py
+++ b/src/llama_stack/apis/prompts/init.py
@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .prompts import ListPromptsResponse, Prompt, Prompts
+
+__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
--- a/src/llama_stack/apis/prompts/prompts.py
+++ b/src/llama_stack/apis/prompts/prompts.py
@ -0,0 +1,204 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import re
+import secrets
+from typing import Protocol, runtime_checkable
+
+from pydantic import BaseModel, Field, field_validator, model_validator
+
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.core.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class Prompt(BaseModel):
+    """A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
+
+    :param prompt: The system prompt text with variable placeholders. Variables are only supported when using the Responses API.
+    :param version: Version (integer starting at 1, incremented on save)
+    :param prompt_id: Unique identifier formatted as 'pmpt_<48-digit-hash>'
+    :param variables: List of prompt variable names that can be used in the prompt template
+    :param is_default: Boolean indicating whether this version is the default version for this prompt
+    """
+
+    prompt: str | None = Field(default=None, description="The system prompt with variable placeholders")
+    version: int = Field(description="Version (integer starting at 1, incremented on save)", ge=1)
+    prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'")
+    variables: list[str] = Field(
+        default_factory=list, description="List of variable names that can be used in the prompt template"
+    )
+    is_default: bool = Field(
+        default=False, description="Boolean indicating whether this version is the default version"
+    )
+
+    @field_validator("prompt_id")
+    @classmethod
+    def validate_prompt_id(cls, prompt_id: str) -> str:
+        if not isinstance(prompt_id, str):
+            raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'")
+
+        if not prompt_id.startswith("pmpt_"):
+            raise ValueError("prompt_id must start with 'pmpt_' prefix")
+
+        hex_part = prompt_id[5:]
+        if len(hex_part) != 48:
+            raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)")
+
+        for char in hex_part:
+            if char not in "0123456789abcdef":
+                raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]")
+
+        return prompt_id
+
+    @field_validator("version")
+    @classmethod
+    def validate_version(cls, prompt_version: int) -> int:
+        if prompt_version < 1:
+            raise ValueError("version must be >= 1")
+        return prompt_version
+
+    @model_validator(mode="after")
+    def validate_prompt_variables(self):
+        """Validate that all variables used in the prompt are declared in the variables list."""
+        if not self.prompt:
+            return self
+
+        prompt_variables = set(re.findall(r"{{\s*(\w+)\s*}}", self.prompt))
+        declared_variables = set(self.variables)
+
+        undeclared = prompt_variables - declared_variables
+        if undeclared:
+            raise ValueError(f"Prompt contains undeclared variables: {sorted(undeclared)}")
+
+        return self
+
+    @classmethod
+    def generate_prompt_id(cls) -> str:
+        # Generate 48 hex characters (24 bytes)
+        random_bytes = secrets.token_bytes(24)
+        hex_string = random_bytes.hex()
+        return f"pmpt_{hex_string}"
+
+
+class ListPromptsResponse(BaseModel):
+    """Response model to list prompts."""
+
+    data: list[Prompt]
+
+
+@runtime_checkable
+@trace_protocol
+class Prompts(Protocol):
+    """Prompts
+
+    Protocol for prompt management operations."""
+
+    @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_prompts(self) -> ListPromptsResponse:
+        """List all prompts.
+
+        :returns: A ListPromptsResponse containing all prompts.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_prompt_versions(
+        self,
+        prompt_id: str,
+    ) -> ListPromptsResponse:
+        """List prompt versions.
+
+        List all versions of a specific prompt.
+
+        :param prompt_id: The identifier of the prompt to list versions for.
+        :returns: A ListPromptsResponse containing all versions of the prompt.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1)
+    async def get_prompt(
+        self,
+        prompt_id: str,
+        version: int | None = None,
+    ) -> Prompt:
+        """Get prompt.
+
+        Get a prompt by its identifier and optional version.
+
+        :param prompt_id: The identifier of the prompt to get.
+        :param version: The version of the prompt to get (defaults to latest).
+        :returns: A Prompt resource.
+        """
+        ...
+
+    @webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1)
+    async def create_prompt(
+        self,
+        prompt: str,
+        variables: list[str] | None = None,
+    ) -> Prompt:
+        """Create prompt.
+
+        Create a new prompt.
+
+        :param prompt: The prompt text content with variable placeholders.
+        :param variables: List of variable names that can be used in the prompt template.
+        :returns: The created Prompt resource.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1)
+    async def update_prompt(
+        self,
+        prompt_id: str,
+        prompt: str,
+        version: int,
+        variables: list[str] | None = None,
+        set_as_default: bool = True,
+    ) -> Prompt:
+        """Update prompt.
+
+        Update an existing prompt (increments version).
+
+        :param prompt_id: The identifier of the prompt to update.
+        :param prompt: The updated prompt text content.
+        :param version: The current version of the prompt being updated.
+        :param variables: Updated list of variable names that can be used in the prompt template.
+        :param set_as_default: Set the new version as the default (default=True).
+        :returns: The updated Prompt resource with incremented version.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def delete_prompt(
+        self,
+        prompt_id: str,
+    ) -> None:
+        """Delete prompt.
+
+        Delete a prompt.
+
+        :param prompt_id: The identifier of the prompt to delete.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1)
+    async def set_default_version(
+        self,
+        prompt_id: str,
+        version: int,
+    ) -> Prompt:
+        """Set prompt version.
+
+        Set which version of a prompt should be the default in get_prompt (latest).
+
+        :param prompt_id: The identifier of the prompt.
+        :param version: The version to set as default.
+        :returns: The prompt with the specified version now set as default.
+        """
+        ...
--- a/src/llama_stack/apis/providers/init.py
+++ b/src/llama_stack/apis/providers/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .providers import *
--- a/src/llama_stack/apis/providers/providers.py
+++ b/src/llama_stack/apis/providers/providers.py
@ -0,0 +1,69 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.providers.datatypes import HealthResponse
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class ProviderInfo(BaseModel):
+    """Information about a registered provider including its configuration and health status.
+
+    :param api: The API name this provider implements
+    :param provider_id: Unique identifier for the provider
+    :param provider_type: The type of provider implementation
+    :param config: Configuration parameters for the provider
+    :param health: Current health status of the provider
+    """
+
+    api: str
+    provider_id: str
+    provider_type: str
+    config: dict[str, Any]
+    health: HealthResponse
+
+
+class ListProvidersResponse(BaseModel):
+    """Response containing a list of all available providers.
+
+    :param data: List of provider information objects
+    """
+
+    data: list[ProviderInfo]
+
+
+@runtime_checkable
+class Providers(Protocol):
+    """Providers
+
+    Providers API for inspecting, listing, and modifying providers and their configurations.
+    """
+
+    @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_providers(self) -> ListProvidersResponse:
+        """List providers.
+
+        List all available providers.
+
+        :returns: A ListProvidersResponse containing information about all providers.
+        """
+        ...
+
+    @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
+    async def inspect_provider(self, provider_id: str) -> ProviderInfo:
+        """Get provider.
+
+        Get detailed information about a specific provider.
+
+        :param provider_id: The ID of the provider to inspect.
+        :returns: A ProviderInfo object containing the provider's details.
+        """
+        ...
--- a/src/llama_stack/apis/resource.py
+++ b/src/llama_stack/apis/resource.py
@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from enum import StrEnum
+
+from pydantic import BaseModel, Field
+
+
+class ResourceType(StrEnum):
+    model = "model"
+    shield = "shield"
+    vector_store = "vector_store"
+    dataset = "dataset"
+    scoring_function = "scoring_function"
+    benchmark = "benchmark"
+    tool = "tool"
+    tool_group = "tool_group"
+    prompt = "prompt"
+
+
+class Resource(BaseModel):
+    """Base class for all Llama Stack resources"""
+
+    identifier: str = Field(description="Unique identifier for this resource in llama stack")
+
+    provider_resource_id: str | None = Field(
+        default=None,
+        description="Unique identifier for this resource in the provider",
+    )
+
+    provider_id: str = Field(description="ID of the provider that owns this resource")
+
+    type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")
--- a/src/llama_stack/apis/safety/init.py
+++ b/src/llama_stack/apis/safety/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .safety import *
--- a/src/llama_stack/apis/safety/safety.py
+++ b/src/llama_stack/apis/safety/safety.py
@ -0,0 +1,135 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import Enum
+from typing import Any, Protocol, runtime_checkable
+
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.inference import OpenAIMessageParam
+from llama_stack.apis.shields import Shield
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.core.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class ModerationObjectResults(BaseModel):
+    """A moderation object.
+    :param flagged: Whether any of the below categories are flagged.
+    :param categories: A list of the categories, and whether they are flagged or not.
+    :param category_applied_input_types: A list of the categories along with the input type(s) that the score applies to.
+    :param category_scores: A list of the categories along with their scores as predicted by model.
+    """
+
+    flagged: bool
+    categories: dict[str, bool] | None = None
+    category_applied_input_types: dict[str, list[str]] | None = None
+    category_scores: dict[str, float] | None = None
+    user_message: str | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+@json_schema_type
+class ModerationObject(BaseModel):
+    """A moderation object.
+    :param id: The unique identifier for the moderation request.
+    :param model: The model used to generate the moderation results.
+    :param results: A list of moderation objects
+    """
+
+    id: str
+    model: str
+    results: list[ModerationObjectResults]
+
+
+@json_schema_type
+class ViolationLevel(Enum):
+    """Severity level of a safety violation.
+
+    :cvar INFO: Informational level violation that does not require action
+    :cvar WARN: Warning level violation that suggests caution but allows continuation
+    :cvar ERROR: Error level violation that requires blocking or intervention
+    """
+
+    INFO = "info"
+    WARN = "warn"
+    ERROR = "error"
+
+
+@json_schema_type
+class SafetyViolation(BaseModel):
+    """Details of a safety violation detected by content moderation.
+
+    :param violation_level: Severity level of the violation
+    :param user_message: (Optional) Message to convey to the user about the violation
+    :param metadata: Additional metadata including specific violation codes for debugging and telemetry
+    """
+
+    violation_level: ViolationLevel
+
+    # what message should you convey to the user
+    user_message: str | None = None
+
+    # additional metadata (including specific violation codes) more for
+    # debugging, telemetry
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+@json_schema_type
+class RunShieldResponse(BaseModel):
+    """Response from running a safety shield.
+
+    :param violation: (Optional) Safety violation detected by the shield, if any
+    """
+
+    violation: SafetyViolation | None = None
+
+
+class ShieldStore(Protocol):
+    async def get_shield(self, identifier: str) -> Shield: ...
+
+
+@runtime_checkable
+@trace_protocol
+class Safety(Protocol):
+    """Safety
+
+    OpenAI-compatible Moderations API.
+    """
+
+    shield_store: ShieldStore
+
+    @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
+    async def run_shield(
+        self,
+        shield_id: str,
+        messages: list[OpenAIMessageParam],
+        params: dict[str, Any],
+    ) -> RunShieldResponse:
+        """Run shield.
+
+        Run a shield.
+
+        :param shield_id: The identifier of the shield to run.
+        :param messages: The messages to run the shield on.
+        :param params: The parameters of the shield.
+        :returns: A RunShieldResponse.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
+    async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
+        """Create moderation.
+
+        Classifies if text and/or image inputs are potentially harmful.
+        :param input: Input (or inputs) to classify.
+        Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
+        :param model: (Optional) The content moderation model you would like to use.
+        :returns: A moderation object.
+        """
+        ...
--- a/src/llama_stack/apis/scoring/init.py
+++ b/src/llama_stack/apis/scoring/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .scoring import *
--- a/src/llama_stack/apis/scoring/scoring.py
+++ b/src/llama_stack/apis/scoring/scoring.py
@ -0,0 +1,93 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+# mapping of metric to value
+ScoringResultRow = dict[str, Any]
+
+
+@json_schema_type
+class ScoringResult(BaseModel):
+    """
+    A scoring result for a single row.
+
+    :param score_rows: The scoring result for each row. Each row is a map of column name to value.
+    :param aggregated_results: Map of metric name to aggregated value
+    """
+
+    score_rows: list[ScoringResultRow]
+    # aggregated metrics to value
+    aggregated_results: dict[str, Any]
+
+
+@json_schema_type
+class ScoreBatchResponse(BaseModel):
+    """Response from batch scoring operations on datasets.
+
+    :param dataset_id: (Optional) The identifier of the dataset that was scored
+    :param results: A map of scoring function name to ScoringResult
+    """
+
+    dataset_id: str | None = None
+    results: dict[str, ScoringResult]
+
+
+@json_schema_type
+class ScoreResponse(BaseModel):
+    """
+    The response from scoring.
+
+    :param results: A map of scoring function name to ScoringResult.
+    """
+
+    # each key in the dict is a scoring function name
+    results: dict[str, ScoringResult]
+
+
+class ScoringFunctionStore(Protocol):
+    def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: ...
+
+
+@runtime_checkable
+class Scoring(Protocol):
+    scoring_function_store: ScoringFunctionStore
+
+    @webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1)
+    async def score_batch(
+        self,
+        dataset_id: str,
+        scoring_functions: dict[str, ScoringFnParams | None],
+        save_results_dataset: bool = False,
+    ) -> ScoreBatchResponse:
+        """Score a batch of rows.
+
+        :param dataset_id: The ID of the dataset to score.
+        :param scoring_functions: The scoring functions to use for the scoring.
+        :param save_results_dataset: Whether to save the results to a dataset.
+        :returns: A ScoreBatchResponse.
+        """
+        ...
+
+    @webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1)
+    async def score(
+        self,
+        input_rows: list[dict[str, Any]],
+        scoring_functions: dict[str, ScoringFnParams | None],
+    ) -> ScoreResponse:
+        """Score a list of rows.
+
+        :param input_rows: The rows to score.
+        :param scoring_functions: The scoring functions to use for the scoring.
+        :returns: A ScoreResponse object containing rows and aggregated results.
+        """
+        ...
--- a/src/llama_stack/apis/scoring_functions/init.py
+++ b/src/llama_stack/apis/scoring_functions/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .scoring_functions import *
--- a/src/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/src/llama_stack/apis/scoring_functions/scoring_functions.py
@ -0,0 +1,208 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# TODO: use enum.StrEnum when we drop support for python 3.10
+from enum import StrEnum
+from typing import (
+    Annotated,
+    Any,
+    Literal,
+    Protocol,
+    runtime_checkable,
+)
+
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.common.type_system import ParamType
+from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+
+
+# Perhaps more structure can be imposed on these functions. Maybe they could be associated
+# with standard metrics so they can be rolled up?
+@json_schema_type
+class ScoringFnParamsType(StrEnum):
+    """Types of scoring function parameter configurations.
+    :cvar llm_as_judge: Use an LLM model to evaluate and score responses
+    :cvar regex_parser: Use regex patterns to extract and score specific parts of responses
+    :cvar basic: Basic scoring with simple aggregation functions
+    """
+
+    llm_as_judge = "llm_as_judge"
+    regex_parser = "regex_parser"
+    basic = "basic"
+
+
+@json_schema_type
+class AggregationFunctionType(StrEnum):
+    """Types of aggregation functions for scoring results.
+    :cvar average: Calculate the arithmetic mean of scores
+    :cvar weighted_average: Calculate a weighted average of scores
+    :cvar median: Calculate the median value of scores
+    :cvar categorical_count: Count occurrences of categorical values
+    :cvar accuracy: Calculate accuracy as the proportion of correct answers
+    """
+
+    average = "average"
+    weighted_average = "weighted_average"
+    median = "median"
+    categorical_count = "categorical_count"
+    accuracy = "accuracy"
+
+
+@json_schema_type
+class LLMAsJudgeScoringFnParams(BaseModel):
+    """Parameters for LLM-as-judge scoring function configuration.
+    :param type: The type of scoring function parameters, always llm_as_judge
+    :param judge_model: Identifier of the LLM model to use as a judge for scoring
+    :param prompt_template: (Optional) Custom prompt template for the judge model
+    :param judge_score_regexes: Regexes to extract the answer from generated response
+    :param aggregation_functions: Aggregation functions to apply to the scores of each row
+    """
+
+    type: Literal[ScoringFnParamsType.llm_as_judge] = ScoringFnParamsType.llm_as_judge
+    judge_model: str
+    prompt_template: str | None = None
+    judge_score_regexes: list[str] = Field(
+        description="Regexes to extract the answer from generated response",
+        default_factory=lambda: [],
+    )
+    aggregation_functions: list[AggregationFunctionType] = Field(
+        description="Aggregation functions to apply to the scores of each row",
+        default_factory=lambda: [],
+    )
+
+
+@json_schema_type
+class RegexParserScoringFnParams(BaseModel):
+    """Parameters for regex parser scoring function configuration.
+    :param type: The type of scoring function parameters, always regex_parser
+    :param parsing_regexes: Regex to extract the answer from generated response
+    :param aggregation_functions: Aggregation functions to apply to the scores of each row
+    """
+
+    type: Literal[ScoringFnParamsType.regex_parser] = ScoringFnParamsType.regex_parser
+    parsing_regexes: list[str] = Field(
+        description="Regex to extract the answer from generated response",
+        default_factory=lambda: [],
+    )
+    aggregation_functions: list[AggregationFunctionType] = Field(
+        description="Aggregation functions to apply to the scores of each row",
+        default_factory=lambda: [],
+    )
+
+
+@json_schema_type
+class BasicScoringFnParams(BaseModel):
+    """Parameters for basic scoring function configuration.
+    :param type: The type of scoring function parameters, always basic
+    :param aggregation_functions: Aggregation functions to apply to the scores of each row
+    """
+
+    type: Literal[ScoringFnParamsType.basic] = ScoringFnParamsType.basic
+    aggregation_functions: list[AggregationFunctionType] = Field(
+        description="Aggregation functions to apply to the scores of each row",
+        default_factory=list,
+    )
+
+
+ScoringFnParams = Annotated[
+    LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams,
+    Field(discriminator="type"),
+]
+register_schema(ScoringFnParams, name="ScoringFnParams")
+
+
+class CommonScoringFnFields(BaseModel):
+    description: str | None = None
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Any additional metadata for this definition",
+    )
+    return_type: ParamType = Field(
+        description="The return type of the deterministic function",
+    )
+    params: ScoringFnParams | None = Field(
+        description="The parameters for the scoring function for benchmark eval, these can be overridden for app eval",
+        default=None,
+    )
+
+
+@json_schema_type
+class ScoringFn(CommonScoringFnFields, Resource):
+    """A scoring function resource for evaluating model outputs.
+    :param type: The resource type, always scoring_function
+    """
+
+    type: Literal[ResourceType.scoring_function] = ResourceType.scoring_function
+
+    @property
+    def scoring_fn_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_scoring_fn_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class ScoringFnInput(CommonScoringFnFields, BaseModel):
+    scoring_fn_id: str
+    provider_id: str | None = None
+    provider_scoring_fn_id: str | None = None
+
+
+class ListScoringFunctionsResponse(BaseModel):
+    data: list[ScoringFn]
+
+
+@runtime_checkable
+class ScoringFunctions(Protocol):
+    @webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
+        """List all scoring functions.
+
+        :returns: A ListScoringFunctionsResponse.
+        """
+        ...
+
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1)
+    async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
+        """Get a scoring function by its ID.
+
+        :param scoring_fn_id: The ID of the scoring function to get.
+        :returns: A ScoringFn.
+        """
+        ...
+
+    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
+    async def register_scoring_function(
+        self,
+        scoring_fn_id: str,
+        description: str,
+        return_type: ParamType,
+        provider_scoring_fn_id: str | None = None,
+        provider_id: str | None = None,
+        params: ScoringFnParams | None = None,
+    ) -> None:
+        """Register a scoring function.
+
+        :param scoring_fn_id: The ID of the scoring function to register.
+        :param description: The description of the scoring function.
+        :param return_type: The return type of the scoring function.
+        :param provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function.
+        :param provider_id: The ID of the provider to use for the scoring function.
+        :param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
+        """
+        ...
+
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
+        """Unregister a scoring function.
+
+        :param scoring_fn_id: The ID of the scoring function to unregister.
+        """
+        ...
--- a/src/llama_stack/apis/shields/init.py
+++ b/src/llama_stack/apis/shields/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .shields import *
--- a/src/llama_stack/apis/shields/shields.py
+++ b/src/llama_stack/apis/shields/shields.py
@ -0,0 +1,94 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Literal, Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.core.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+class CommonShieldFields(BaseModel):
+    params: dict[str, Any] | None = None
+
+
+@json_schema_type
+class Shield(CommonShieldFields, Resource):
+    """A safety shield resource that can be used to check content.
+
+    :param params: (Optional) Configuration parameters for the shield
+    :param type: The resource type, always shield
+    """
+
+    type: Literal[ResourceType.shield] = ResourceType.shield
+
+    @property
+    def shield_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_shield_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class ShieldInput(CommonShieldFields):
+    shield_id: str
+    provider_id: str | None = None
+    provider_shield_id: str | None = None
+
+
+class ListShieldsResponse(BaseModel):
+    data: list[Shield]
+
+
+@runtime_checkable
+@trace_protocol
+class Shields(Protocol):
+    @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_shields(self) -> ListShieldsResponse:
+        """List all shields.
+
+        :returns: A ListShieldsResponse.
+        """
+        ...
+
+    @webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1)
+    async def get_shield(self, identifier: str) -> Shield:
+        """Get a shield by its identifier.
+
+        :param identifier: The identifier of the shield to get.
+        :returns: A Shield.
+        """
+        ...
+
+    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
+    async def register_shield(
+        self,
+        shield_id: str,
+        provider_shield_id: str | None = None,
+        provider_id: str | None = None,
+        params: dict[str, Any] | None = None,
+    ) -> Shield:
+        """Register a shield.
+
+        :param shield_id: The identifier of the shield to register.
+        :param provider_shield_id: The identifier of the shield in the provider.
+        :param provider_id: The identifier of the provider.
+        :param params: The parameters of the shield.
+        :returns: A Shield.
+        """
+        ...
+
+    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def unregister_shield(self, identifier: str) -> None:
+        """Unregister a shield.
+
+        :param identifier: The identifier of the shield to unregister.
+        """
+        ...
--- a/src/llama_stack/apis/synthetic_data_generation/init.py
+++ b/src/llama_stack/apis/synthetic_data_generation/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .synthetic_data_generation import *
--- a/src/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/src/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@ -0,0 +1,77 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import Enum
+from typing import Any, Protocol
+
+from pydantic import BaseModel
+
+from llama_stack.apis.inference import Message
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+class FilteringFunction(Enum):
+    """The type of filtering function.
+
+    :cvar none: No filtering applied, accept all generated synthetic data
+    :cvar random: Random sampling of generated data points
+    :cvar top_k: Keep only the top-k highest scoring synthetic data samples
+    :cvar top_p: Nucleus-style filtering, keep samples exceeding cumulative score threshold
+    :cvar top_k_top_p: Combined top-k and top-p filtering strategy
+    :cvar sigmoid: Apply sigmoid function for probability-based filtering
+    """
+
+    none = "none"
+    random = "random"
+    top_k = "top_k"
+    top_p = "top_p"
+    top_k_top_p = "top_k_top_p"
+    sigmoid = "sigmoid"
+
+
+@json_schema_type
+class SyntheticDataGenerationRequest(BaseModel):
+    """Request to generate synthetic data. A small batch of prompts and a filtering function
+
+    :param dialogs: List of conversation messages to use as input for synthetic data generation
+    :param filtering_function: Type of filtering to apply to generated synthetic data samples
+    :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
+    """
+
+    dialogs: list[Message]
+    filtering_function: FilteringFunction = FilteringFunction.none
+    model: str | None = None
+
+
+@json_schema_type
+class SyntheticDataGenerationResponse(BaseModel):
+    """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.
+
+    :param synthetic_data: List of generated synthetic data samples that passed the filtering criteria
+    :param statistics: (Optional) Statistical information about the generation process and filtering results
+    """
+
+    synthetic_data: list[dict[str, Any]]
+    statistics: dict[str, Any] | None = None
+
+
+class SyntheticDataGeneration(Protocol):
+    @webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1)
+    def synthetic_data_generate(
+        self,
+        dialogs: list[Message],
+        filtering_function: FilteringFunction = FilteringFunction.none,
+        model: str | None = None,
+    ) -> SyntheticDataGenerationResponse:
+        """Generate synthetic data based on input dialogs and apply filtering.
+
+        :param dialogs: List of conversation messages to use as input for synthetic data generation
+        :param filtering_function: Type of filtering to apply to generated synthetic data samples
+        :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
+        :returns: Response containing filtered synthetic data samples and optional statistics
+        """
+        ...
--- a/src/llama_stack/apis/telemetry/init.py
+++ b/src/llama_stack/apis/telemetry/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .telemetry import *
--- a/src/llama_stack/apis/telemetry/telemetry.py
+++ b/src/llama_stack/apis/telemetry/telemetry.py
@ -0,0 +1,423 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from datetime import datetime
+from enum import Enum
+from typing import (
+    Annotated,
+    Any,
+    Literal,
+    Protocol,
+    runtime_checkable,
+)
+
+from pydantic import BaseModel, Field
+
+from llama_stack.models.llama.datatypes import Primitive
+from llama_stack.schema_utils import json_schema_type, register_schema
+
+# Add this constant near the top of the file, after the imports
+DEFAULT_TTL_DAYS = 7
+
+
+@json_schema_type
+class SpanStatus(Enum):
+    """The status of a span indicating whether it completed successfully or with an error.
+    :cvar OK: Span completed successfully without errors
+    :cvar ERROR: Span completed with an error or failure
+    """
+
+    OK = "ok"
+    ERROR = "error"
+
+
+@json_schema_type
+class Span(BaseModel):
+    """A span representing a single operation within a trace.
+    :param span_id: Unique identifier for the span
+    :param trace_id: Unique identifier for the trace this span belongs to
+    :param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
+    :param name: Human-readable name describing the operation this span represents
+    :param start_time: Timestamp when the operation began
+    :param end_time: (Optional) Timestamp when the operation finished, if completed
+    :param attributes: (Optional) Key-value pairs containing additional metadata about the span
+    """
+
+    span_id: str
+    trace_id: str
+    parent_span_id: str | None = None
+    name: str
+    start_time: datetime
+    end_time: datetime | None = None
+    attributes: dict[str, Any] | None = Field(default_factory=lambda: {})
+
+    def set_attribute(self, key: str, value: Any):
+        if self.attributes is None:
+            self.attributes = {}
+        self.attributes[key] = value
+
+
+@json_schema_type
+class Trace(BaseModel):
+    """A trace representing the complete execution path of a request across multiple operations.
+    :param trace_id: Unique identifier for the trace
+    :param root_span_id: Unique identifier for the root span that started this trace
+    :param start_time: Timestamp when the trace began
+    :param end_time: (Optional) Timestamp when the trace finished, if completed
+    """
+
+    trace_id: str
+    root_span_id: str
+    start_time: datetime
+    end_time: datetime | None = None
+
+
+@json_schema_type
+class EventType(Enum):
+    """The type of telemetry event being logged.
+    :cvar UNSTRUCTURED_LOG: A simple log message with severity level
+    :cvar STRUCTURED_LOG: A structured log event with typed payload data
+    :cvar METRIC: A metric measurement with value and unit
+    """
+
+    UNSTRUCTURED_LOG = "unstructured_log"
+    STRUCTURED_LOG = "structured_log"
+    METRIC = "metric"
+
+
+@json_schema_type
+class LogSeverity(Enum):
+    """The severity level of a log message.
+    :cvar VERBOSE: Detailed diagnostic information for troubleshooting
+    :cvar DEBUG: Debug information useful during development
+    :cvar INFO: General informational messages about normal operation
+    :cvar WARN: Warning messages about potentially problematic situations
+    :cvar ERROR: Error messages indicating failures that don't stop execution
+    :cvar CRITICAL: Critical error messages indicating severe failures
+    """
+
+    VERBOSE = "verbose"
+    DEBUG = "debug"
+    INFO = "info"
+    WARN = "warn"
+    ERROR = "error"
+    CRITICAL = "critical"
+
+
+class EventCommon(BaseModel):
+    """Common fields shared by all telemetry events.
+    :param trace_id: Unique identifier for the trace this event belongs to
+    :param span_id: Unique identifier for the span this event belongs to
+    :param timestamp: Timestamp when the event occurred
+    :param attributes: (Optional) Key-value pairs containing additional metadata about the event
+    """
+
+    trace_id: str
+    span_id: str
+    timestamp: datetime
+    attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {})
+
+
+@json_schema_type
+class UnstructuredLogEvent(EventCommon):
+    """An unstructured log event containing a simple text message.
+    :param type: Event type identifier set to UNSTRUCTURED_LOG
+    :param message: The log message text
+    :param severity: The severity level of the log message
+    """
+
+    type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
+    message: str
+    severity: LogSeverity
+
+
+@json_schema_type
+class MetricEvent(EventCommon):
+    """A metric event containing a measured value.
+    :param type: Event type identifier set to METRIC
+    :param metric: The name of the metric being measured
+    :param value: The numeric value of the metric measurement
+    :param unit: The unit of measurement for the metric value
+    """
+
+    type: Literal[EventType.METRIC] = EventType.METRIC
+    metric: str  # this would be an enum
+    value: int | float
+    unit: str
+
+
+@json_schema_type
+class MetricInResponse(BaseModel):
+    """A metric value included in API responses.
+    :param metric: The name of the metric
+    :param value: The numeric value of the metric
+    :param unit: (Optional) The unit of measurement for the metric value
+    """
+
+    metric: str
+    value: int | float
+    unit: str | None = None
+
+
+# This is a short term solution to allow inference API to return metrics
+# The ideal way to do this is to have a way for all response types to include metrics
+# and all metric events logged to the telemetry API to be included with the response
+# To do this, we will need to augment all response types with a metrics field.
+# We have hit a blocker from stainless SDK that prevents us from doing this.
+# The blocker is that if we were to augment the response types that have a data field
+# in them like so
+# class ListModelsResponse(BaseModel):
+# metrics: Optional[List[MetricEvent]] = None
+# data: List[Models]
+# ...
+# The client SDK will need to access the data by using a .data field, which is not
+# ergonomic. Stainless SDK does support unwrapping the response type, but it
+# requires that the response type to only have a single field.
+
+# We will need a way in the client SDK to signal that the metrics are needed
+# and if they are needed, the client SDK has to return the full response type
+# without unwrapping it.
+
+
+class MetricResponseMixin(BaseModel):
+    """Mixin class for API responses that can include metrics.
+    :param metrics: (Optional) List of metrics associated with the API response
+    """
+
+    metrics: list[MetricInResponse] | None = None
+
+
+@json_schema_type
+class StructuredLogType(Enum):
+    """The type of structured log event payload.
+    :cvar SPAN_START: Event indicating the start of a new span
+    :cvar SPAN_END: Event indicating the completion of a span
+    """
+
+    SPAN_START = "span_start"
+    SPAN_END = "span_end"
+
+
+@json_schema_type
+class SpanStartPayload(BaseModel):
+    """Payload for a span start event.
+    :param type: Payload type identifier set to SPAN_START
+    :param name: Human-readable name describing the operation this span represents
+    :param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
+    """
+
+    type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
+    name: str
+    parent_span_id: str | None = None
+
+
+@json_schema_type
+class SpanEndPayload(BaseModel):
+    """Payload for a span end event.
+    :param type: Payload type identifier set to SPAN_END
+    :param status: The final status of the span indicating success or failure
+    """
+
+    type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
+    status: SpanStatus
+
+
+StructuredLogPayload = Annotated[
+    SpanStartPayload | SpanEndPayload,
+    Field(discriminator="type"),
+]
+register_schema(StructuredLogPayload, name="StructuredLogPayload")
+
+
+@json_schema_type
+class StructuredLogEvent(EventCommon):
+    """A structured log event containing typed payload data.
+    :param type: Event type identifier set to STRUCTURED_LOG
+    :param payload: The structured payload data for the log event
+    """
+
+    type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
+    payload: StructuredLogPayload
+
+
+Event = Annotated[
+    UnstructuredLogEvent | MetricEvent | StructuredLogEvent,
+    Field(discriminator="type"),
+]
+register_schema(Event, name="Event")
+
+
+@json_schema_type
+class EvalTrace(BaseModel):
+    """A trace record for evaluation purposes.
+    :param session_id: Unique identifier for the evaluation session
+    :param step: The evaluation step or phase identifier
+    :param input: The input data for the evaluation
+    :param output: The actual output produced during evaluation
+    :param expected_output: The expected output for comparison during evaluation
+    """
+
+    session_id: str
+    step: str
+    input: str
+    output: str
+    expected_output: str
+
+
+@json_schema_type
+class SpanWithStatus(Span):
+    """A span that includes status information.
+    :param status: (Optional) The current status of the span
+    """
+
+    status: SpanStatus | None = None
+
+
+@json_schema_type
+class QueryConditionOp(Enum):
+    """Comparison operators for query conditions.
+    :cvar EQ: Equal to comparison
+    :cvar NE: Not equal to comparison
+    :cvar GT: Greater than comparison
+    :cvar LT: Less than comparison
+    """
+
+    EQ = "eq"
+    NE = "ne"
+    GT = "gt"
+    LT = "lt"
+
+
+@json_schema_type
+class QueryCondition(BaseModel):
+    """A condition for filtering query results.
+    :param key: The attribute key to filter on
+    :param op: The comparison operator to apply
+    :param value: The value to compare against
+    """
+
+    key: str
+    op: QueryConditionOp
+    value: Any
+
+
+class QueryTracesResponse(BaseModel):
+    """Response containing a list of traces.
+    :param data: List of traces matching the query criteria
+    """
+
+    data: list[Trace]
+
+
+class QuerySpansResponse(BaseModel):
+    """Response containing a list of spans.
+    :param data: List of spans matching the query criteria
+    """
+
+    data: list[Span]
+
+
+class QuerySpanTreeResponse(BaseModel):
+    """Response containing a tree structure of spans.
+    :param data: Dictionary mapping span IDs to spans with status information
+    """
+
+    data: dict[str, SpanWithStatus]
+
+
+class MetricQueryType(Enum):
+    """The type of metric query to perform.
+    :cvar RANGE: Query metrics over a time range
+    :cvar INSTANT: Query metrics at a specific point in time
+    """
+
+    RANGE = "range"
+    INSTANT = "instant"
+
+
+class MetricLabelOperator(Enum):
+    """Operators for matching metric labels.
+    :cvar EQUALS: Label value must equal the specified value
+    :cvar NOT_EQUALS: Label value must not equal the specified value
+    :cvar REGEX_MATCH: Label value must match the specified regular expression
+    :cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression
+    """
+
+    EQUALS = "="
+    NOT_EQUALS = "!="
+    REGEX_MATCH = "=~"
+    REGEX_NOT_MATCH = "!~"
+
+
+class MetricLabelMatcher(BaseModel):
+    """A matcher for filtering metrics by label values.
+    :param name: The name of the label to match
+    :param value: The value to match against
+    :param operator: The comparison operator to use for matching
+    """
+
+    name: str
+    value: str
+    operator: MetricLabelOperator = MetricLabelOperator.EQUALS
+
+
+@json_schema_type
+class MetricLabel(BaseModel):
+    """A label associated with a metric.
+    :param name: The name of the label
+    :param value: The value of the label
+    """
+
+    name: str
+    value: str
+
+
+@json_schema_type
+class MetricDataPoint(BaseModel):
+    """A single data point in a metric time series.
+    :param timestamp: Unix timestamp when the metric value was recorded
+    :param value: The numeric value of the metric at this timestamp
+    """
+
+    timestamp: int
+    value: float
+    unit: str
+
+
+@json_schema_type
+class MetricSeries(BaseModel):
+    """A time series of metric data points.
+    :param metric: The name of the metric
+    :param labels: List of labels associated with this metric series
+    :param values: List of data points in chronological order
+    """
+
+    metric: str
+    labels: list[MetricLabel]
+    values: list[MetricDataPoint]
+
+
+class QueryMetricsResponse(BaseModel):
+    """Response containing metric time series data.
+    :param data: List of metric series matching the query criteria
+    """
+
+    data: list[MetricSeries]
+
+
+@runtime_checkable
+class Telemetry(Protocol):
+    async def log_event(
+        self,
+        event: Event,
+        ttl_seconds: int = DEFAULT_TTL_DAYS * 86400,
+    ) -> None:
+        """Log an event.
+
+        :param event: The event to log.
+        :param ttl_seconds: The time to live of the event.
+        """
+        ...
--- a/src/llama_stack/apis/tools/init.py
+++ b/src/llama_stack/apis/tools/init.py
@ -0,0 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .rag_tool import *
+from .tools import *
--- a/src/llama_stack/apis/tools/rag_tool.py
+++ b/src/llama_stack/apis/tools/rag_tool.py
@ -0,0 +1,218 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import Enum, StrEnum
+from typing import Annotated, Any, Literal, Protocol
+
+from pydantic import BaseModel, Field, field_validator
+from typing_extensions import runtime_checkable
+
+from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.core.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+
+
+@json_schema_type
+class RRFRanker(BaseModel):
+    """
+    Reciprocal Rank Fusion (RRF) ranker configuration.
+
+    :param type: The type of ranker, always "rrf"
+    :param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
+                         Must be greater than 0
+    """
+
+    type: Literal["rrf"] = "rrf"
+    impact_factor: float = Field(default=60.0, gt=0.0)  # default of 60 for optimal performance
+
+
+@json_schema_type
+class WeightedRanker(BaseModel):
+    """
+    Weighted ranker configuration that combines vector and keyword scores.
+
+    :param type: The type of ranker, always "weighted"
+    :param alpha: Weight factor between 0 and 1.
+                 0 means only use keyword scores,
+                 1 means only use vector scores,
+                 values in between blend both scores.
+    """
+
+    type: Literal["weighted"] = "weighted"
+    alpha: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+        description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.",
+    )
+
+
+Ranker = Annotated[
+    RRFRanker | WeightedRanker,
+    Field(discriminator="type"),
+]
+register_schema(Ranker, name="Ranker")
+
+
+@json_schema_type
+class RAGDocument(BaseModel):
+    """
+    A document to be used for document ingestion in the RAG Tool.
+
+    :param document_id: The unique identifier for the document.
+    :param content: The content of the document.
+    :param mime_type: The MIME type of the document.
+    :param metadata: Additional metadata for the document.
+    """
+
+    document_id: str
+    content: InterleavedContent | URL
+    mime_type: str | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+@json_schema_type
+class RAGQueryResult(BaseModel):
+    """Result of a RAG query containing retrieved content and metadata.
+
+    :param content: (Optional) The retrieved content from the query
+    :param metadata: Additional metadata about the query result
+    """
+
+    content: InterleavedContent | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+@json_schema_type
+class RAGQueryGenerator(Enum):
+    """Types of query generators for RAG systems.
+
+    :cvar default: Default query generator using simple text processing
+    :cvar llm: LLM-based query generator for enhanced query understanding
+    :cvar custom: Custom query generator implementation
+    """
+
+    default = "default"
+    llm = "llm"
+    custom = "custom"
+
+
+@json_schema_type
+class RAGSearchMode(StrEnum):
+    """
+    Search modes for RAG query retrieval:
+    - VECTOR: Uses vector similarity search for semantic matching
+    - KEYWORD: Uses keyword-based search for exact matching
+    - HYBRID: Combines both vector and keyword search for better results
+    """
+
+    VECTOR = "vector"
+    KEYWORD = "keyword"
+    HYBRID = "hybrid"
+
+
+@json_schema_type
+class DefaultRAGQueryGeneratorConfig(BaseModel):
+    """Configuration for the default RAG query generator.
+
+    :param type: Type of query generator, always 'default'
+    :param separator: String separator used to join query terms
+    """
+
+    type: Literal["default"] = "default"
+    separator: str = " "
+
+
+@json_schema_type
+class LLMRAGQueryGeneratorConfig(BaseModel):
+    """Configuration for the LLM-based RAG query generator.
+
+    :param type: Type of query generator, always 'llm'
+    :param model: Name of the language model to use for query generation
+    :param template: Template string for formatting the query generation prompt
+    """
+
+    type: Literal["llm"] = "llm"
+    model: str
+    template: str
+
+
+RAGQueryGeneratorConfig = Annotated[
+    DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
+    Field(discriminator="type"),
+]
+register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig")
+
+
+@json_schema_type
+class RAGQueryConfig(BaseModel):
+    """
+    Configuration for the RAG query generation.
+
+    :param query_generator_config: Configuration for the query generator.
+    :param max_tokens_in_context: Maximum number of tokens in the context.
+    :param max_chunks: Maximum number of chunks to retrieve.
+    :param chunk_template: Template for formatting each retrieved chunk in the context.
+        Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
+        Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
+    :param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector".
+    :param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker.
+    """
+
+    # This config defines how a query is generated using the messages
+    # for memory bank retrieval.
+    query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
+    max_tokens_in_context: int = 4096
+    max_chunks: int = 5
+    chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
+    mode: RAGSearchMode | None = RAGSearchMode.VECTOR
+    ranker: Ranker | None = Field(default=None)  # Only used for hybrid mode
+
+    @field_validator("chunk_template")
+    def validate_chunk_template(cls, v: str) -> str:
+        if "{chunk.content}" not in v:
+            raise ValueError("chunk_template must contain {chunk.content}")
+        if "{index}" not in v:
+            raise ValueError("chunk_template must contain {index}")
+        if len(v) == 0:
+            raise ValueError("chunk_template must not be empty")
+        return v
+
+
+@runtime_checkable
+@trace_protocol
+class RAGToolRuntime(Protocol):
+    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
+    async def insert(
+        self,
+        documents: list[RAGDocument],
+        vector_db_id: str,
+        chunk_size_in_tokens: int = 512,
+    ) -> None:
+        """Index documents so they can be used by the RAG system.
+
+        :param documents: List of documents to index in the RAG system
+        :param vector_db_id: ID of the vector database to store the document embeddings
+        :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
+        """
+        ...
+
+    @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
+    async def query(
+        self,
+        content: InterleavedContent,
+        vector_db_ids: list[str],
+        query_config: RAGQueryConfig | None = None,
+    ) -> RAGQueryResult:
+        """Query the RAG system for context; typically invoked by the agent.
+
+        :param content: The query content to search for in the indexed documents
+        :param vector_db_ids: List of vector database IDs to search within
+        :param query_config: (Optional) Configuration parameters for the query operation
+        :returns: RAGQueryResult containing the retrieved content and metadata
+        """
+        ...
--- a/src/llama_stack/apis/tools/tools.py
+++ b/src/llama_stack/apis/tools/tools.py
@ -0,0 +1,221 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import Enum
+from typing import Any, Literal, Protocol
+
+from pydantic import BaseModel
+from typing_extensions import runtime_checkable
+
+from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.core.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+from .rag_tool import RAGToolRuntime
+
+
+@json_schema_type
+class ToolDef(BaseModel):
+    """Tool definition used in runtime contexts.
+
+    :param name: Name of the tool
+    :param description: (Optional) Human-readable description of what the tool does
+    :param input_schema: (Optional) JSON Schema for tool inputs (MCP inputSchema)
+    :param output_schema: (Optional) JSON Schema for tool outputs (MCP outputSchema)
+    :param metadata: (Optional) Additional metadata about the tool
+    :param toolgroup_id: (Optional) ID of the tool group this tool belongs to
+    """
+
+    toolgroup_id: str | None = None
+    name: str
+    description: str | None = None
+    input_schema: dict[str, Any] | None = None
+    output_schema: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
+
+
+@json_schema_type
+class ToolGroupInput(BaseModel):
+    """Input data for registering a tool group.
+
+    :param toolgroup_id: Unique identifier for the tool group
+    :param provider_id: ID of the provider that will handle this tool group
+    :param args: (Optional) Additional arguments to pass to the provider
+    :param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools
+    """
+
+    toolgroup_id: str
+    provider_id: str
+    args: dict[str, Any] | None = None
+    mcp_endpoint: URL | None = None
+
+
+@json_schema_type
+class ToolGroup(Resource):
+    """A group of related tools managed together.
+
+    :param type: Type of resource, always 'tool_group'
+    :param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools
+    :param args: (Optional) Additional arguments for the tool group
+    """
+
+    type: Literal[ResourceType.tool_group] = ResourceType.tool_group
+    mcp_endpoint: URL | None = None
+    args: dict[str, Any] | None = None
+
+
+@json_schema_type
+class ToolInvocationResult(BaseModel):
+    """Result of a tool invocation.
+
+    :param content: (Optional) The output content from the tool execution
+    :param error_message: (Optional) Error message if the tool execution failed
+    :param error_code: (Optional) Numeric error code if the tool execution failed
+    :param metadata: (Optional) Additional metadata about the tool execution
+    """
+
+    content: InterleavedContent | None = None
+    error_message: str | None = None
+    error_code: int | None = None
+    metadata: dict[str, Any] | None = None
+
+
+class ToolStore(Protocol):
+    async def get_tool(self, tool_name: str) -> ToolDef: ...
+    async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ...
+
+
+class ListToolGroupsResponse(BaseModel):
+    """Response containing a list of tool groups.
+
+    :param data: List of tool groups
+    """
+
+    data: list[ToolGroup]
+
+
+class ListToolDefsResponse(BaseModel):
+    """Response containing a list of tool definitions.
+
+    :param data: List of tool definitions
+    """
+
+    data: list[ToolDef]
+
+
+@runtime_checkable
+@trace_protocol
+class ToolGroups(Protocol):
+    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
+    async def register_tool_group(
+        self,
+        toolgroup_id: str,
+        provider_id: str,
+        mcp_endpoint: URL | None = None,
+        args: dict[str, Any] | None = None,
+    ) -> None:
+        """Register a tool group.
+
+        :param toolgroup_id: The ID of the tool group to register.
+        :param provider_id: The ID of the provider to use for the tool group.
+        :param mcp_endpoint: The MCP endpoint to use for the tool group.
+        :param args: A dictionary of arguments to pass to the tool group.
+        """
+        ...
+
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
+    async def get_tool_group(
+        self,
+        toolgroup_id: str,
+    ) -> ToolGroup:
+        """Get a tool group by its ID.
+
+        :param toolgroup_id: The ID of the tool group to get.
+        :returns: A ToolGroup.
+        """
+        ...
+
+    @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_tool_groups(self) -> ListToolGroupsResponse:
+        """List tool groups with optional provider.
+
+        :returns: A ListToolGroupsResponse.
+        """
+        ...
+
+    @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
+        """List tools with optional tool group.
+
+        :param toolgroup_id: The ID of the tool group to list tools for.
+        :returns: A ListToolDefsResponse.
+        """
+        ...
+
+    @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
+    async def get_tool(
+        self,
+        tool_name: str,
+    ) -> ToolDef:
+        """Get a tool by its name.
+
+        :param tool_name: The name of the tool to get.
+        :returns: A ToolDef.
+        """
+        ...
+
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def unregister_toolgroup(
+        self,
+        toolgroup_id: str,
+    ) -> None:
+        """Unregister a tool group.
+
+        :param toolgroup_id: The ID of the tool group to unregister.
+        """
+        ...
+
+
+class SpecialToolGroup(Enum):
+    """Special tool groups with predefined functionality.
+
+    :cvar rag_tool: Retrieval-Augmented Generation tool group for document search and retrieval
+    """
+
+    rag_tool = "rag_tool"
+
+
+@runtime_checkable
+@trace_protocol
+class ToolRuntime(Protocol):
+    tool_store: ToolStore | None = None
+
+    rag_tool: RAGToolRuntime | None = None
+
+    # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
+    @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_runtime_tools(
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+    ) -> ListToolDefsResponse:
+        """List all tools in the runtime.
+
+        :param tool_group_id: The ID of the tool group to list tools for.
+        :param mcp_endpoint: The MCP endpoint to use for the tool group.
+        :returns: A ListToolDefsResponse.
+        """
+        ...
+
+    @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+        """Run a tool with the given arguments.
+
+        :param tool_name: The name of the tool to invoke.
+        :param kwargs: A dictionary of arguments to pass to the tool.
+        :returns: A ToolInvocationResult.
+        """
+        ...
--- a/src/llama_stack/apis/vector_io/init.py
+++ b/src/llama_stack/apis/vector_io/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .vector_io import *
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@ -0,0 +1,960 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import uuid
+from typing import Annotated, Any, Literal, Protocol, runtime_checkable
+
+from fastapi import Body
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.inference import InterleavedContent
+from llama_stack.apis.vector_stores import VectorStore
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.core.telemetry.trace_protocol import trace_protocol
+from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack.strong_typing.schema import register_schema
+
+
+@json_schema_type
+class ChunkMetadata(BaseModel):
+    """
+    `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
+        will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
+        is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
+        Use `Chunk.metadata` for metadata that will be used in the context during inference.
+    :param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content.
+    :param document_id: The ID of the document this chunk belongs to.
+    :param source: The source of the content, such as a URL, file path, or other identifier.
+    :param created_timestamp: An optional timestamp indicating when the chunk was created.
+    :param updated_timestamp: An optional timestamp indicating when the chunk was last updated.
+    :param chunk_window: The window of the chunk, which can be used to group related chunks together.
+    :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken.
+    :param chunk_embedding_model: The embedding model used to create the chunk's embedding.
+    :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk.
+    :param content_token_count: The number of tokens in the content of the chunk.
+    :param metadata_token_count: The number of tokens in the metadata of the chunk.
+    """
+
+    chunk_id: str | None = None
+    document_id: str | None = None
+    source: str | None = None
+    created_timestamp: int | None = None
+    updated_timestamp: int | None = None
+    chunk_window: str | None = None
+    chunk_tokenizer: str | None = None
+    chunk_embedding_model: str | None = None
+    chunk_embedding_dimension: int | None = None
+    content_token_count: int | None = None
+    metadata_token_count: int | None = None
+
+
+@json_schema_type
+class Chunk(BaseModel):
+    """
+    A chunk of content that can be inserted into a vector database.
+    :param content: The content of the chunk, which can be interleaved text, images, or other types.
+    :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
+    :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
+    :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
+    :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
+        The `chunk_metadata` is required backend functionality.
+    """
+
+    content: InterleavedContent
+    metadata: dict[str, Any] = Field(default_factory=dict)
+    embedding: list[float] | None = None
+    # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
+    stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
+    chunk_metadata: ChunkMetadata | None = None
+
+    model_config = {"populate_by_name": True}
+
+    def model_post_init(self, __context):
+        # Extract chunk_id from metadata if present
+        if self.metadata and "chunk_id" in self.metadata:
+            self.stored_chunk_id = self.metadata.pop("chunk_id")
+
+    @property
+    def chunk_id(self) -> str:
+        """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
+        if self.stored_chunk_id:
+            return self.stored_chunk_id
+
+        if "document_id" in self.metadata:
+            return generate_chunk_id(self.metadata["document_id"], str(self.content))
+
+        return generate_chunk_id(str(uuid.uuid4()), str(self.content))
+
+    @property
+    def document_id(self) -> str | None:
+        """Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""
+        # Check metadata first (takes precedence)
+        doc_id = self.metadata.get("document_id")
+        if doc_id is not None:
+            if not isinstance(doc_id, str):
+                raise TypeError(f"metadata['document_id'] must be a string, got {type(doc_id).__name__}: {doc_id!r}")
+            return doc_id
+
+        # Fall back to chunk_metadata if available (Pydantic ensures type safety)
+        if self.chunk_metadata is not None:
+            return self.chunk_metadata.document_id
+
+        return None
+
+
+@json_schema_type
+class QueryChunksResponse(BaseModel):
+    """Response from querying chunks in a vector database.
+
+    :param chunks: List of content chunks returned from the query
+    :param scores: Relevance scores corresponding to each returned chunk
+    """
+
+    chunks: list[Chunk]
+    scores: list[float]
+
+
+@json_schema_type
+class VectorStoreFileCounts(BaseModel):
+    """File processing status counts for a vector store.
+
+    :param completed: Number of files that have been successfully processed
+    :param cancelled: Number of files that had their processing cancelled
+    :param failed: Number of files that failed to process
+    :param in_progress: Number of files currently being processed
+    :param total: Total number of files in the vector store
+    """
+
+    completed: int
+    cancelled: int
+    failed: int
+    in_progress: int
+    total: int
+
+
+# TODO: rename this as OpenAIVectorStore
+@json_schema_type
+class VectorStoreObject(BaseModel):
+    """OpenAI Vector Store object.
+
+    :param id: Unique identifier for the vector store
+    :param object: Object type identifier, always "vector_store"
+    :param created_at: Timestamp when the vector store was created
+    :param name: (Optional) Name of the vector store
+    :param usage_bytes: Storage space used by the vector store in bytes
+    :param file_counts: File processing status counts for the vector store
+    :param status: Current status of the vector store
+    :param expires_after: (Optional) Expiration policy for the vector store
+    :param expires_at: (Optional) Timestamp when the vector store will expire
+    :param last_active_at: (Optional) Timestamp of last activity on the vector store
+    :param metadata: Set of key-value pairs that can be attached to the vector store
+    """
+
+    id: str
+    object: str = "vector_store"
+    created_at: int
+    name: str | None = None
+    usage_bytes: int = 0
+    file_counts: VectorStoreFileCounts
+    status: str = "completed"
+    expires_after: dict[str, Any] | None = None
+    expires_at: int | None = None
+    last_active_at: int | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+@json_schema_type
+class VectorStoreCreateRequest(BaseModel):
+    """Request to create a vector store.
+
+    :param name: (Optional) Name for the vector store
+    :param file_ids: List of file IDs to include in the vector store
+    :param expires_after: (Optional) Expiration policy for the vector store
+    :param chunking_strategy: (Optional) Strategy for splitting files into chunks
+    :param metadata: Set of key-value pairs that can be attached to the vector store
+    """
+
+    name: str | None = None
+    file_ids: list[str] = Field(default_factory=list)
+    expires_after: dict[str, Any] | None = None
+    chunking_strategy: dict[str, Any] | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+@json_schema_type
+class VectorStoreModifyRequest(BaseModel):
+    """Request to modify a vector store.
+
+    :param name: (Optional) Updated name for the vector store
+    :param expires_after: (Optional) Updated expiration policy for the vector store
+    :param metadata: (Optional) Updated set of key-value pairs for the vector store
+    """
+
+    name: str | None = None
+    expires_after: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
+
+
+@json_schema_type
+class VectorStoreListResponse(BaseModel):
+    """Response from listing vector stores.
+
+    :param object: Object type identifier, always "list"
+    :param data: List of vector store objects
+    :param first_id: (Optional) ID of the first vector store in the list for pagination
+    :param last_id: (Optional) ID of the last vector store in the list for pagination
+    :param has_more: Whether there are more vector stores available beyond this page
+    """
+
+    object: str = "list"
+    data: list[VectorStoreObject]
+    first_id: str | None = None
+    last_id: str | None = None
+    has_more: bool = False
+
+
+@json_schema_type
+class VectorStoreSearchRequest(BaseModel):
+    """Request to search a vector store.
+
+    :param query: Search query as a string or list of strings
+    :param filters: (Optional) Filters based on file attributes to narrow search results
+    :param max_num_results: Maximum number of results to return, defaults to 10
+    :param ranking_options: (Optional) Options for ranking and filtering search results
+    :param rewrite_query: Whether to rewrite the query for better vector search performance
+    """
+
+    query: str | list[str]
+    filters: dict[str, Any] | None = None
+    max_num_results: int = 10
+    ranking_options: dict[str, Any] | None = None
+    rewrite_query: bool = False
+
+
+@json_schema_type
+class VectorStoreContent(BaseModel):
+    """Content item from a vector store file or search result.
+
+    :param type: Content type, currently only "text" is supported
+    :param text: The actual text content
+    """
+
+    type: Literal["text"]
+    text: str
+
+
+@json_schema_type
+class VectorStoreSearchResponse(BaseModel):
+    """Response from searching a vector store.
+
+    :param file_id: Unique identifier of the file containing the result
+    :param filename: Name of the file containing the result
+    :param score: Relevance score for this search result
+    :param attributes: (Optional) Key-value attributes associated with the file
+    :param content: List of content items matching the search query
+    """
+
+    file_id: str
+    filename: str
+    score: float
+    attributes: dict[str, str | float | bool] | None = None
+    content: list[VectorStoreContent]
+
+
+@json_schema_type
+class VectorStoreSearchResponsePage(BaseModel):
+    """Paginated response from searching a vector store.
+
+    :param object: Object type identifier for the search results page
+    :param search_query: The original search query that was executed
+    :param data: List of search result objects
+    :param has_more: Whether there are more results available beyond this page
+    :param next_page: (Optional) Token for retrieving the next page of results
+    """
+
+    object: str = "vector_store.search_results.page"
+    search_query: str
+    data: list[VectorStoreSearchResponse]
+    has_more: bool = False
+    next_page: str | None = None
+
+
+@json_schema_type
+class VectorStoreDeleteResponse(BaseModel):
+    """Response from deleting a vector store.
+
+    :param id: Unique identifier of the deleted vector store
+    :param object: Object type identifier for the deletion response
+    :param deleted: Whether the deletion operation was successful
+    """
+
+    id: str
+    object: str = "vector_store.deleted"
+    deleted: bool = True
+
+
+@json_schema_type
+class VectorStoreChunkingStrategyAuto(BaseModel):
+    """Automatic chunking strategy for vector store files.
+
+    :param type: Strategy type, always "auto" for automatic chunking
+    """
+
+    type: Literal["auto"] = "auto"
+
+
+@json_schema_type
+class VectorStoreChunkingStrategyStaticConfig(BaseModel):
+    """Configuration for static chunking strategy.
+
+    :param chunk_overlap_tokens: Number of tokens to overlap between adjacent chunks
+    :param max_chunk_size_tokens: Maximum number of tokens per chunk, must be between 100 and 4096
+    """
+
+    chunk_overlap_tokens: int = 400
+    max_chunk_size_tokens: int = Field(800, ge=100, le=4096)
+
+
+@json_schema_type
+class VectorStoreChunkingStrategyStatic(BaseModel):
+    """Static chunking strategy with configurable parameters.
+
+    :param type: Strategy type, always "static" for static chunking
+    :param static: Configuration parameters for the static chunking strategy
+    """
+
+    type: Literal["static"] = "static"
+    static: VectorStoreChunkingStrategyStaticConfig
+
+
+VectorStoreChunkingStrategy = Annotated[
+    VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic,
+    Field(discriminator="type"),
+]
+register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy")
+
+
+class SearchRankingOptions(BaseModel):
+    """Options for ranking and filtering search results.
+
+    :param ranker: (Optional) Name of the ranking algorithm to use
+    :param score_threshold: (Optional) Minimum relevance score threshold for results
+    """
+
+    ranker: str | None = None
+    # NOTE: OpenAI File Search Tool requires threshold to be between 0 and 1, however
+    # we don't guarantee that the score is between 0 and 1, so will leave this unconstrained
+    # and let the provider handle it
+    score_threshold: float | None = Field(default=0.0)
+
+
+@json_schema_type
+class VectorStoreFileLastError(BaseModel):
+    """Error information for failed vector store file processing.
+
+    :param code: Error code indicating the type of failure
+    :param message: Human-readable error message describing the failure
+    """
+
+    code: Literal["server_error"] | Literal["rate_limit_exceeded"]
+    message: str
+
+
+VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"]
+register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
+
+
+@json_schema_type
+class VectorStoreFileObject(BaseModel):
+    """OpenAI Vector Store File object.
+
+    :param id: Unique identifier for the file
+    :param object: Object type identifier, always "vector_store.file"
+    :param attributes: Key-value attributes associated with the file
+    :param chunking_strategy: Strategy used for splitting the file into chunks
+    :param created_at: Timestamp when the file was added to the vector store
+    :param last_error: (Optional) Error information if file processing failed
+    :param status: Current processing status of the file
+    :param usage_bytes: Storage space used by this file in bytes
+    :param vector_store_id: ID of the vector store containing this file
+    """
+
+    id: str
+    object: str = "vector_store.file"
+    attributes: dict[str, Any] = Field(default_factory=dict)
+    chunking_strategy: VectorStoreChunkingStrategy
+    created_at: int
+    last_error: VectorStoreFileLastError | None = None
+    status: VectorStoreFileStatus
+    usage_bytes: int = 0
+    vector_store_id: str
+
+
+@json_schema_type
+class VectorStoreListFilesResponse(BaseModel):
+    """Response from listing files in a vector store.
+
+    :param object: Object type identifier, always "list"
+    :param data: List of vector store file objects
+    :param first_id: (Optional) ID of the first file in the list for pagination
+    :param last_id: (Optional) ID of the last file in the list for pagination
+    :param has_more: Whether there are more files available beyond this page
+    """
+
+    object: str = "list"
+    data: list[VectorStoreFileObject]
+    first_id: str | None = None
+    last_id: str | None = None
+    has_more: bool = False
+
+
+@json_schema_type
+class VectorStoreFileContentsResponse(BaseModel):
+    """Response from retrieving the contents of a vector store file.
+
+    :param file_id: Unique identifier for the file
+    :param filename: Name of the file
+    :param attributes: Key-value attributes associated with the file
+    :param content: List of content items from the file
+    """
+
+    file_id: str
+    filename: str
+    attributes: dict[str, Any]
+    content: list[VectorStoreContent]
+
+
+@json_schema_type
+class VectorStoreFileDeleteResponse(BaseModel):
+    """Response from deleting a vector store file.
+
+    :param id: Unique identifier of the deleted file
+    :param object: Object type identifier for the deletion response
+    :param deleted: Whether the deletion operation was successful
+    """
+
+    id: str
+    object: str = "vector_store.file.deleted"
+    deleted: bool = True
+
+
+@json_schema_type
+class VectorStoreFileBatchObject(BaseModel):
+    """OpenAI Vector Store File Batch object.
+
+    :param id: Unique identifier for the file batch
+    :param object: Object type identifier, always "vector_store.file_batch"
+    :param created_at: Timestamp when the file batch was created
+    :param vector_store_id: ID of the vector store containing the file batch
+    :param status: Current processing status of the file batch
+    :param file_counts: File processing status counts for the batch
+    """
+
+    id: str
+    object: str = "vector_store.file_batch"
+    created_at: int
+    vector_store_id: str
+    status: VectorStoreFileStatus
+    file_counts: VectorStoreFileCounts
+
+
+@json_schema_type
+class VectorStoreFilesListInBatchResponse(BaseModel):
+    """Response from listing files in a vector store file batch.
+
+    :param object: Object type identifier, always "list"
+    :param data: List of vector store file objects in the batch
+    :param first_id: (Optional) ID of the first file in the list for pagination
+    :param last_id: (Optional) ID of the last file in the list for pagination
+    :param has_more: Whether there are more files available beyond this page
+    """
+
+    object: str = "list"
+    data: list[VectorStoreFileObject]
+    first_id: str | None = None
+    last_id: str | None = None
+    has_more: bool = False
+
+
+# extra_body can be accessed via .model_extra
+@json_schema_type
+class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
+    """Request to create a vector store with extra_body support.
+
+    :param name: (Optional) A name for the vector store
+    :param file_ids: List of file IDs to include in the vector store
+    :param expires_after: (Optional) Expiration policy for the vector store
+    :param chunking_strategy: (Optional) Strategy for splitting files into chunks
+    :param metadata: Set of key-value pairs that can be attached to the vector store
+    """
+
+    name: str | None = None
+    file_ids: list[str] | None = None
+    expires_after: dict[str, Any] | None = None
+    chunking_strategy: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
+
+
+# extra_body can be accessed via .model_extra
+@json_schema_type
+class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"):
+    """Request to create a vector store file batch with extra_body support.
+
+    :param file_ids: A list of File IDs that the vector store should use
+    :param attributes: (Optional) Key-value attributes to store with the files
+    :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto
+    """
+
+    file_ids: list[str]
+    attributes: dict[str, Any] | None = None
+    chunking_strategy: VectorStoreChunkingStrategy | None = None
+
+
+class VectorStoreTable(Protocol):
+    def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...
+
+
+@runtime_checkable
+@trace_protocol
+class VectorIO(Protocol):
+    vector_store_table: VectorStoreTable | None = None
+
+    # this will just block now until chunks are inserted, but it should
+    # probably return a Job instance which can be polled for completion
+    # TODO: rename vector_db_id to vector_store_id once Stainless is working
+    @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
+    async def insert_chunks(
+        self,
+        vector_db_id: str,
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
+    ) -> None:
+        """Insert chunks into a vector database.
+
+        :param vector_db_id: The identifier of the vector database to insert the chunks into.
+        :param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types.
+            `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional.
+            If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
+            If `embedding` is not provided, it will be computed later.
+        :param ttl_seconds: The time to live of the chunks.
+        """
+        ...
+
+    # TODO: rename vector_db_id to vector_store_id once Stainless is working
+    @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
+    async def query_chunks(
+        self,
+        vector_db_id: str,
+        query: InterleavedContent,
+        params: dict[str, Any] | None = None,
+    ) -> QueryChunksResponse:
+        """Query chunks from a vector database.
+
+        :param vector_db_id: The identifier of the vector database to query.
+        :param query: The query to search for.
+        :param params: The parameters of the query.
+        :returns: A QueryChunksResponse.
+        """
+        ...
+
+    # OpenAI Vector Stores API endpoints
+    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
+    async def openai_create_vector_store(
+        self,
+        params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
+    ) -> VectorStoreObject:
+        """Creates a vector store.
+
+        Generate an OpenAI-compatible vector store with the given parameters.
+        :returns: A VectorStoreObject representing the created vector store.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
+    async def openai_list_vector_stores(
+        self,
+        limit: int | None = 20,
+        order: str | None = "desc",
+        after: str | None = None,
+        before: str | None = None,
+    ) -> VectorStoreListResponse:
+        """Returns a list of vector stores.
+
+        :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
+        :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
+        :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list.
+        :param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list.
+        :returns: A VectorStoreListResponse containing the list of vector stores.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
+    async def openai_retrieve_vector_store(
+        self,
+        vector_store_id: str,
+    ) -> VectorStoreObject:
+        """Retrieves a vector store.
+
+        :param vector_store_id: The ID of the vector store to retrieve.
+        :returns: A VectorStoreObject representing the vector store.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_update_vector_store(
+        self,
+        vector_store_id: str,
+        name: str | None = None,
+        expires_after: dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> VectorStoreObject:
+        """Updates a vector store.
+
+        :param vector_store_id: The ID of the vector store to update.
+        :param name: The name of the vector store.
+        :param expires_after: The expiration policy for a vector store.
+        :param metadata: Set of 16 key-value pairs that can be attached to an object.
+        :returns: A VectorStoreObject representing the updated vector store.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_delete_vector_store(
+        self,
+        vector_store_id: str,
+    ) -> VectorStoreDeleteResponse:
+        """Delete a vector store.
+
+        :param vector_store_id: The ID of the vector store to delete.
+        :returns: A VectorStoreDeleteResponse indicating the deletion status.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/search",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/search",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_search_vector_store(
+        self,
+        vector_store_id: str,
+        query: str | list[str],
+        filters: dict[str, Any] | None = None,
+        max_num_results: int | None = 10,
+        ranking_options: SearchRankingOptions | None = None,
+        rewrite_query: bool | None = False,
+        search_mode: (
+            str | None
+        ) = "vector",  # Using str instead of Literal due to OpenAPI schema generator limitations
+    ) -> VectorStoreSearchResponsePage:
+        """Search for chunks in a vector store.
+
+        Searches a vector store for relevant chunks based on a query and optional file attribute filters.
+
+        :param vector_store_id: The ID of the vector store to search.
+        :param query: The query string or array for performing the search.
+        :param filters: Filters based on file attributes to narrow the search results.
+        :param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
+        :param ranking_options: Ranking options for fine-tuning the search results.
+        :param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
+        :param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
+        :returns: A VectorStoreSearchResponse containing the search results.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/files",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_attach_file_to_vector_store(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: VectorStoreChunkingStrategy | None = None,
+    ) -> VectorStoreFileObject:
+        """Attach a file to a vector store.
+
+        :param vector_store_id: The ID of the vector store to attach the file to.
+        :param file_id: The ID of the file to attach to the vector store.
+        :param attributes: The key-value attributes stored with the file, which can be used for filtering.
+        :param chunking_strategy: The chunking strategy to use for the file.
+        :returns: A VectorStoreFileObject representing the attached file.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/files",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_list_files_in_vector_store(
+        self,
+        vector_store_id: str,
+        limit: int | None = 20,
+        order: str | None = "desc",
+        after: str | None = None,
+        before: str | None = None,
+        filter: VectorStoreFileStatus | None = None,
+    ) -> VectorStoreListFilesResponse:
+        """List files in a vector store.
+
+        :param vector_store_id: The ID of the vector store to list files from.
+        :param limit: (Optional) A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
+        :param order: (Optional) Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
+        :param after: (Optional) A cursor for use in pagination. `after` is an object ID that defines your place in the list.
+        :param before: (Optional) A cursor for use in pagination. `before` is an object ID that defines your place in the list.
+        :param filter: (Optional) Filter by file status to only return files with the specified status.
+        :returns: A VectorStoreListFilesResponse containing the list of files.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/files/{file_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_retrieve_vector_store_file(
+        self,
+        vector_store_id: str,
+        file_id: str,
+    ) -> VectorStoreFileObject:
+        """Retrieves a vector store file.
+
+        :param vector_store_id: The ID of the vector store containing the file to retrieve.
+        :param file_id: The ID of the file to retrieve.
+        :returns: A VectorStoreFileObject representing the file.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/files/{file_id}/content",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_retrieve_vector_store_file_contents(
+        self,
+        vector_store_id: str,
+        file_id: str,
+    ) -> VectorStoreFileContentsResponse:
+        """Retrieves the contents of a vector store file.
+
+        :param vector_store_id: The ID of the vector store containing the file to retrieve.
+        :param file_id: The ID of the file to retrieve.
+        :returns: A list of InterleavedContent representing the file contents.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/files/{file_id}",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_update_vector_store_file(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any],
+    ) -> VectorStoreFileObject:
+        """Updates a vector store file.
+
+        :param vector_store_id: The ID of the vector store containing the file to update.
+        :param file_id: The ID of the file to update.
+        :param attributes: The updated key-value attributes to store with the file.
+        :returns: A VectorStoreFileObject representing the updated file.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/files/{file_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_delete_vector_store_file(
+        self,
+        vector_store_id: str,
+        file_id: str,
+    ) -> VectorStoreFileDeleteResponse:
+        """Delete a vector store file.
+
+        :param vector_store_id: The ID of the vector store containing the file to delete.
+        :param file_id: The ID of the file to delete.
+        :returns: A VectorStoreFileDeleteResponse indicating the deletion status.
+        """
+        ...
+
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/file_batches",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    async def openai_create_vector_store_file_batch(
+        self,
+        vector_store_id: str,
+        params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
+    ) -> VectorStoreFileBatchObject:
+        """Create a vector store file batch.
+
+        Generate an OpenAI-compatible vector store file batch for the given vector store.
+        :param vector_store_id: The ID of the vector store to create the file batch for.
+        :returns: A VectorStoreFileBatchObject representing the created file batch.
+        """
+        ...
+
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    async def openai_retrieve_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ) -> VectorStoreFileBatchObject:
+        """Retrieve a vector store file batch.
+
+        :param batch_id: The ID of the file batch to retrieve.
+        :param vector_store_id: The ID of the vector store containing the file batch.
+        :returns: A VectorStoreFileBatchObject representing the file batch.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_list_files_in_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        filter: str | None = None,
+        limit: int | None = 20,
+        order: str | None = "desc",
+    ) -> VectorStoreFilesListInBatchResponse:
+        """Returns a list of vector store files in a batch.
+
+        :param batch_id: The ID of the file batch to list files from.
+        :param vector_store_id: The ID of the vector store containing the file batch.
+        :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list.
+        :param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list.
+        :param filter: Filter by file status. One of in_progress, completed, failed, cancelled.
+        :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
+        :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
+        :returns: A VectorStoreFilesListInBatchResponse containing the list of files in the batch.
+        """
+        ...
+
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+    )
+    async def openai_cancel_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ) -> VectorStoreFileBatchObject:
+        """Cancels a vector store file batch.
+
+        :param batch_id: The ID of the file batch to cancel.
+        :param vector_store_id: The ID of the vector store containing the file batch.
+        :returns: A VectorStoreFileBatchObject representing the cancelled file batch.
+        """
+        ...
--- a/src/llama_stack/apis/vector_stores/init.py
+++ b/src/llama_stack/apis/vector_stores/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .vector_stores import *
--- a/src/llama_stack/apis/vector_stores/vector_stores.py
+++ b/src/llama_stack/apis/vector_stores/vector_stores.py
@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Literal
+
+from pydantic import BaseModel
+
+from llama_stack.apis.resource import Resource, ResourceType
+
+
+# Internal resource type for storing the vector store routing and other information
+class VectorStore(Resource):
+    """Vector database resource for storing and querying vector embeddings.
+
+    :param type: Type of resource, always 'vector_store' for vector stores
+    :param embedding_model: Name of the embedding model to use for vector generation
+    :param embedding_dimension: Dimension of the embedding vectors
+    """
+
+    type: Literal[ResourceType.vector_store] = ResourceType.vector_store
+
+    embedding_model: str
+    embedding_dimension: int
+    vector_store_name: str | None = None
+
+    @property
+    def vector_store_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_vector_store_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class VectorStoreInput(BaseModel):
+    """Input parameters for creating or configuring a vector database.
+
+    :param vector_store_id: Unique identifier for the vector store
+    :param embedding_model: Name of the embedding model to use for vector generation
+    :param embedding_dimension: Dimension of the embedding vectors
+    :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
+    """
+
+    vector_store_id: str
+    embedding_model: str
+    embedding_dimension: int
+    provider_id: str | None = None
+    provider_vector_store_id: str | None = None
--- a/src/llama_stack/apis/version.py
+++ b/src/llama_stack/apis/version.py
@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+LLAMA_STACK_API_V1 = "v1"
+LLAMA_STACK_API_V1BETA = "v1beta"
+LLAMA_STACK_API_V1ALPHA = "v1alpha"
--- a/src/llama_stack/cli/init.py
+++ b/src/llama_stack/cli/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/src/llama_stack/cli/llama.py
+++ b/src/llama_stack/cli/llama.py
@ -0,0 +1,56 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.log import setup_logging
+
+from .stack import StackParser
+from .stack.utils import print_subcommand_description
+
+
+class LlamaCLIParser:
+    """Defines CLI parser for Llama CLI"""
+
+    def __init__(self):
+        self.parser = argparse.ArgumentParser(
+            prog="llama",
+            description="Welcome to the Llama CLI",
+            add_help=True,
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+
+        # Default command is to print help
+        self.parser.set_defaults(func=lambda args: self.parser.print_help())
+
+        subparsers = self.parser.add_subparsers(title="subcommands")
+
+        # Add sub-commands
+        StackParser.create(subparsers)
+
+        print_subcommand_description(self.parser, subparsers)
+
+    def parse_args(self) -> argparse.Namespace:
+        args = self.parser.parse_args()
+        if not isinstance(args, argparse.Namespace):
+            raise TypeError(f"Expected argparse.Namespace, got {type(args)}")
+        return args
+
+    def run(self, args: argparse.Namespace) -> None:
+        args.func(args)
+
+
+def main():
+    # Initialize logging from environment variables before any other operations
+    setup_logging()
+
+    parser = LlamaCLIParser()
+    args = parser.parse_args()
+    parser.run(args)
+
+
+if __name__ == "__main__":
+    main()
--- a/src/llama_stack/cli/scripts/init.py
+++ b/src/llama_stack/cli/scripts/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/src/llama_stack/cli/scripts/install-wheel-from-presigned.sh
+++ b/src/llama_stack/cli/scripts/install-wheel-from-presigned.sh
@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+set -euo pipefail
+
+if [ $# -eq 0 ]; then
+  echo "Please provide a URL as an argument."
+  exit 1
+fi
+
+URL=$1
+
+HEADERS_FILE=$(mktemp)
+curl -s -I "$URL" >"$HEADERS_FILE"
+FILENAME=$(grep -i "x-manifold-obj-canonicalpath:" "$HEADERS_FILE" | sed -E 's/.*nodes\/[^\/]+\/(.+)/\1/' | tr -d "\r\n")
+
+if [ -z "$FILENAME" ]; then
+  echo "Could not find the x-manifold-obj-canonicalpath header."
+  echo "HEADERS_FILE contents: "
+  cat "$HEADERS_FILE"
+  echo ""
+  exit 1
+fi
+
+echo "Downloading $FILENAME..."
+
+curl -s -L -o "$FILENAME" "$URL"
+
+echo "Installing $FILENAME..."
+pip install "$FILENAME"
+echo "Successfully installed $FILENAME"
+
+rm -f "$FILENAME"
--- a/src/llama_stack/cli/scripts/run.py
+++ b/src/llama_stack/cli/scripts/run.py
@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+import subprocess
+import sys
+
+
+def install_wheel_from_presigned():
+    file = "install-wheel-from-presigned.sh"
+    script_path = os.path.join(os.path.dirname(__file__), file)
+    try:
+        subprocess.run(["sh", script_path] + sys.argv[1:], check=True)
+    except Exception:
+        sys.exit(1)
--- a/src/llama_stack/cli/stack/init.py
+++ b/src/llama_stack/cli/stack/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .stack import StackParser  # noqa
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@ -0,0 +1,182 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+import sys
+from pathlib import Path
+
+import yaml
+from termcolor import cprint
+
+from llama_stack.cli.stack.utils import ImageType
+from llama_stack.core.build import get_provider_dependencies
+from llama_stack.core.datatypes import (
+    BuildConfig,
+    BuildProvider,
+    DistributionSpec,
+)
+from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.stack import replace_env_vars
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import Api
+
+TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
+
+logger = get_logger(name=__name__, category="cli")
+
+
+# These are the dependencies needed by the distribution server.
+# `llama-stack` is automatically installed by the installation script.
+SERVER_DEPENDENCIES = [
+    "aiosqlite",
+    "fastapi",
+    "fire",
+    "httpx",
+    "uvicorn",
+    "opentelemetry-sdk",
+    "opentelemetry-exporter-otlp-proto-http",
+]
+
+
+def format_output_deps_only(
+    normal_deps: list[str],
+    special_deps: list[str],
+    external_deps: list[str],
+    uv: bool = False,
+) -> str:
+    """Format dependencies as a list."""
+    lines = []
+
+    uv_str = ""
+    if uv:
+        uv_str = "uv pip install "
+
+    # Quote deps with commas
+    quoted_normal_deps = [quote_if_needed(dep) for dep in normal_deps]
+    lines.append(f"{uv_str}{' '.join(quoted_normal_deps)}")
+
+    for special_dep in special_deps:
+        lines.append(f"{uv_str}{quote_special_dep(special_dep)}")
+
+    for external_dep in external_deps:
+        lines.append(f"{uv_str}{quote_special_dep(external_dep)}")
+
+    return "\n".join(lines)
+
+
+def run_stack_list_deps_command(args: argparse.Namespace) -> None:
+    if args.config:
+        try:
+            from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
+
+            config_file = resolve_config_or_distro(args.config, Mode.BUILD)
+        except ValueError as e:
+            cprint(
+                f"Could not parse config file {args.config}: {e}",
+                color="red",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        if config_file:
+            with open(config_file) as f:
+                try:
+                    contents = yaml.safe_load(f)
+                    contents = replace_env_vars(contents)
+                    build_config = BuildConfig(**contents)
+                    build_config.image_type = "venv"
+                except Exception as e:
+                    cprint(
+                        f"Could not parse config file {config_file}: {e}",
+                        color="red",
+                        file=sys.stderr,
+                    )
+                    sys.exit(1)
+    elif args.providers:
+        provider_list: dict[str, list[BuildProvider]] = dict()
+        for api_provider in args.providers.split(","):
+            if "=" not in api_provider:
+                cprint(
+                    "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
+                    color="red",
+                    file=sys.stderr,
+                )
+                sys.exit(1)
+            api, provider_type = api_provider.split("=")
+            providers_for_api = get_provider_registry().get(Api(api), None)
+            if providers_for_api is None:
+                cprint(
+                    f"{api} is not a valid API.",
+                    color="red",
+                    file=sys.stderr,
+                )
+                sys.exit(1)
+            if provider_type in providers_for_api:
+                provider = BuildProvider(
+                    provider_type=provider_type,
+                    module=None,
+                )
+                provider_list.setdefault(api, []).append(provider)
+            else:
+                cprint(
+                    f"{provider_type} is not a valid provider for the {api} API.",
+                    color="red",
+                    file=sys.stderr,
+                )
+                sys.exit(1)
+        distribution_spec = DistributionSpec(
+            providers=provider_list,
+            description=",".join(args.providers),
+        )
+        build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
+
+    normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
+    normal_deps += SERVER_DEPENDENCIES
+
+    # Add external API dependencies
+    if build_config.external_apis_dir:
+        from llama_stack.core.external import load_external_apis
+
+        external_apis = load_external_apis(build_config)
+        if external_apis:
+            for _, api_spec in external_apis.items():
+                normal_deps.extend(api_spec.pip_packages)
+
+    # Format and output based on requested format
+    output = format_output_deps_only(
+        normal_deps=normal_deps,
+        special_deps=special_deps,
+        external_deps=external_provider_dependencies,
+        uv=args.format == "uv",
+    )
+
+    print(output)
+
+
+def quote_if_needed(dep):
+    # Add quotes if the dependency contains special characters that need escaping in shell
+    # This includes: commas, comparison operators (<, >, <=, >=, ==, !=)
+    needs_quoting = any(char in dep for char in [",", "<", ">", "="])
+    return f"'{dep}'" if needs_quoting else dep
+
+
+def quote_special_dep(dep_string):
+    """
+    Quote individual packages in a special dependency string.
+    Special deps may contain multiple packages and flags like --extra-index-url.
+    We need to quote only the package specs that contain special characters.
+    """
+    parts = dep_string.split()
+    quoted_parts = []
+
+    for part in parts:
+        # Don't quote flags (they start with -)
+        if part.startswith("-"):
+            quoted_parts.append(part)
+        else:
+            # Quote package specs that need it
+            quoted_parts.append(quote_if_needed(part))
+
+    return " ".join(quoted_parts)
--- a/src/llama_stack/cli/stack/list_apis.py
+++ b/src/llama_stack/cli/stack/list_apis.py
@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.cli.subcommand import Subcommand
+
+
+class StackListApis(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "list-apis",
+            prog="llama stack list-apis",
+            description="List APIs part of the Llama Stack implementation",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_apis_list_cmd)
+
+    def _add_arguments(self):
+        pass
+
+    def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
+        from llama_stack.cli.table import print_table
+        from llama_stack.core.distribution import stack_apis
+
+        # eventually, this should query a registry at llama.meta.com/llamastack/distributions
+        headers = [
+            "API",
+        ]
+
+        rows = []
+        for api in stack_apis():
+            rows.append(
+                [
+                    api.value,
+                ]
+            )
+        print_table(
+            rows,
+            headers,
+            separate_rows=True,
+        )
--- a/src/llama_stack/cli/stack/list_deps.py
+++ b/src/llama_stack/cli/stack/list_deps.py
@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import argparse
+
+from llama_stack.cli.subcommand import Subcommand
+
+
+class StackListDeps(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "list-deps",
+            prog="llama stack list-deps",
+            description="list the dependencies for a llama stack distribution",
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_stack_list_deps_command)
+
+    def _add_arguments(self):
+        self.parser.add_argument(
+            "config",
+            type=str,
+            nargs="?",  # Make it optional
+            metavar="config | distro",
+            help="Path to config file to use or name of known distro (llama stack list for a list).",
+        )
+
+        self.parser.add_argument(
+            "--providers",
+            type=str,
+            default=None,
+            help="sync dependencies for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
+        )
+        self.parser.add_argument(
+            "--format",
+            type=str,
+            choices=["uv", "deps-only"],
+            default="deps-only",
+            help="Output format: 'uv' shows shell commands, 'deps-only' shows just the list of dependencies without `uv` (default)",
+        )
+
+    def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
+        # always keep implementation completely silo-ed away from CLI so CLI
+        # can be fast to load and reduces dependencies
+        from ._list_deps import run_stack_list_deps_command
+
+        return run_stack_list_deps_command(args)
--- a/src/llama_stack/cli/stack/list_providers.py
+++ b/src/llama_stack/cli/stack/list_providers.py
@ -0,0 +1,76 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.cli.subcommand import Subcommand
+
+
+class StackListProviders(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "list-providers",
+            prog="llama stack list-providers",
+            description="Show available Llama Stack Providers for an API",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_providers_list_cmd)
+
+    @property
+    def providable_apis(self):
+        from llama_stack.core.distribution import providable_apis
+
+        return [api.value for api in providable_apis()]
+
+    def _add_arguments(self):
+        self.parser.add_argument(
+            "api",
+            type=str,
+            choices=self.providable_apis,
+            nargs="?",
+            help="API to list providers for. List all if not specified.",
+        )
+
+    def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
+        from llama_stack.cli.table import print_table
+        from llama_stack.core.distribution import Api, get_provider_registry
+
+        all_providers = get_provider_registry()
+        if args.api:
+            providers = [(args.api, all_providers[Api(args.api)])]
+        else:
+            providers = [(k.value, prov) for k, prov in all_providers.items()]
+
+        providers = [(api, p) for api, p in providers if api in self.providable_apis]
+
+        # eventually, this should query a registry at llama.meta.com/llamastack/distributions
+        headers = [
+            "API Type",
+            "Provider Type",
+            "PIP Package Dependencies",
+        ]
+
+        rows = []
+
+        specs = [spec for api, p in providers for spec in p.values()]
+        for spec in specs:
+            if spec.is_sample:
+                continue
+            rows.append(
+                [
+                    spec.api.value,
+                    spec.provider_type,
+                    ",".join(spec.pip_packages) if hasattr(spec, "pip_packages") else "",
+                ]
+            )
+        print_table(
+            rows,
+            headers,
+            separate_rows=True,
+            sort_by=(0, 1),
+        )
--- a/src/llama_stack/cli/stack/list_stacks.py
+++ b/src/llama_stack/cli/stack/list_stacks.py
@ -0,0 +1,56 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+from pathlib import Path
+
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.cli.table import print_table
+
+
+class StackListBuilds(Subcommand):
+    """List built stacks in .llama/distributions directory"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "list",
+            prog="llama stack list",
+            description="list the build stacks",
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._list_stack_command)
+
+    def _get_distribution_dirs(self) -> dict[str, Path]:
+        """Return a dictionary of distribution names and their paths"""
+        distributions = {}
+        dist_dir = Path.home() / ".llama" / "distributions"
+
+        if dist_dir.exists():
+            for stack_dir in dist_dir.iterdir():
+                if stack_dir.is_dir():
+                    distributions[stack_dir.name] = stack_dir
+        return distributions
+
+    def _list_stack_command(self, args: argparse.Namespace) -> None:
+        distributions = self._get_distribution_dirs()
+
+        if not distributions:
+            print("No stacks found in ~/.llama/distributions")
+            return
+
+        headers = ["Stack Name", "Path"]
+        headers.extend(["Build Config", "Run Config"])
+        rows = []
+        for name, path in distributions.items():
+            row = [name, str(path)]
+            # Check for build and run config files
+            build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
+            run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
+            row.extend([build_config, run_config])
+            rows.append(row)
+        print_table(rows, headers, separate_rows=True)
--- a/src/llama_stack/cli/stack/remove.py
+++ b/src/llama_stack/cli/stack/remove.py
@ -0,0 +1,115 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+import shutil
+import sys
+from pathlib import Path
+
+from termcolor import cprint
+
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.cli.table import print_table
+
+
+class StackRemove(Subcommand):
+    """Remove the build stack"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "rm",
+            prog="llama stack rm",
+            description="Remove the build stack",
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._remove_stack_build_command)
+
+    def _add_arguments(self) -> None:
+        self.parser.add_argument(
+            "name",
+            type=str,
+            nargs="?",
+            help="Name of the stack to delete",
+        )
+        self.parser.add_argument(
+            "--all",
+            "-a",
+            action="store_true",
+            help="Delete all stacks (use with caution)",
+        )
+
+    def _get_distribution_dirs(self) -> dict[str, Path]:
+        """Return a dictionary of distribution names and their paths"""
+        distributions = {}
+        dist_dir = Path.home() / ".llama" / "distributions"
+
+        if dist_dir.exists():
+            for stack_dir in dist_dir.iterdir():
+                if stack_dir.is_dir():
+                    distributions[stack_dir.name] = stack_dir
+        return distributions
+
+    def _list_stacks(self) -> None:
+        """Display available stacks in a table"""
+        distributions = self._get_distribution_dirs()
+        if not distributions:
+            cprint("No stacks found in ~/.llama/distributions", color="red", file=sys.stderr)
+            sys.exit(1)
+
+        headers = ["Stack Name", "Path"]
+        rows = [[name, str(path)] for name, path in distributions.items()]
+        print_table(rows, headers, separate_rows=True)
+
+    def _remove_stack_build_command(self, args: argparse.Namespace) -> None:
+        distributions = self._get_distribution_dirs()
+
+        if args.all:
+            confirm = input("Are you sure you want to delete ALL stacks? [yes-i-really-want/N] ").lower()
+            if confirm != "yes-i-really-want":
+                cprint("Deletion cancelled.", color="green", file=sys.stderr)
+                return
+
+            for name, path in distributions.items():
+                try:
+                    shutil.rmtree(path)
+                    cprint(f"Deleted stack: {name}", color="green", file=sys.stderr)
+                except Exception as e:
+                    cprint(
+                        f"Failed to delete stack {name}: {e}",
+                        color="red",
+                        file=sys.stderr,
+                    )
+                    sys.exit(1)
+
+        if not args.name:
+            self._list_stacks()
+            if not args.name:
+                return
+
+        if args.name not in distributions:
+            self._list_stacks()
+            cprint(
+                f"Stack not found: {args.name}",
+                color="red",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+        stack_path = distributions[args.name]
+
+        confirm = input(f"Are you sure you want to delete stack '{args.name}'? [y/N] ").lower()
+        if confirm != "y":
+            cprint("Deletion cancelled.", color="green", file=sys.stderr)
+            return
+
+        try:
+            shutil.rmtree(stack_path)
+            cprint(f"Successfully deleted stack: {args.name}", color="green", file=sys.stderr)
+        except Exception as e:
+            cprint(f"Failed to delete stack {args.name}: {e}", color="red", file=sys.stderr)
+            sys.exit(1)
--- a/src/llama_stack/cli/stack/run.py
+++ b/src/llama_stack/cli/stack/run.py
@ -0,0 +1,214 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+import os
+import ssl
+import subprocess
+from pathlib import Path
+
+import uvicorn
+import yaml
+
+from llama_stack.cli.stack.utils import ImageType
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
+from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
+from llama_stack.log import LoggingConfig, get_logger
+
+REPO_ROOT = Path(__file__).parent.parent.parent.parent
+
+logger = get_logger(name=__name__, category="cli")
+
+
+class StackRun(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "run",
+            prog="llama stack run",
+            description="""Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.""",
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_stack_run_cmd)
+
+    def _add_arguments(self):
+        self.parser.add_argument(
+            "config",
+            type=str,
+            nargs="?",  # Make it optional
+            metavar="config | distro",
+            help="Path to config file to use for the run or name of known distro (`llama stack list` for a list).",
+        )
+        self.parser.add_argument(
+            "--port",
+            type=int,
+            help="Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT.",
+            default=int(os.getenv("LLAMA_STACK_PORT", 8321)),
+        )
+        self.parser.add_argument(
+            "--image-name",
+            type=str,
+            default=None,
+            help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
+        )
+        self.parser.add_argument(
+            "--image-type",
+            type=str,
+            help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
+            choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
+        )
+        self.parser.add_argument(
+            "--enable-ui",
+            action="store_true",
+            help="Start the UI server",
+        )
+
+    def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
+        import yaml
+
+        from llama_stack.core.configure import parse_and_maybe_upgrade_config
+
+        if args.image_type or args.image_name:
+            self.parser.error(
+                "The --image-type and --image-name flags are no longer supported.\n\n"
+                "Please activate your virtual environment manually before running `llama stack run`.\n\n"
+                "For example:\n"
+                "  source /path/to/venv/bin/activate\n"
+                "  llama stack run <config>\n"
+            )
+
+        if args.enable_ui:
+            self._start_ui_development_server(args.port)
+
+        if args.config:
+            try:
+                from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
+
+                config_file = resolve_config_or_distro(args.config, Mode.RUN)
+            except ValueError as e:
+                self.parser.error(str(e))
+        else:
+            config_file = None
+
+        if config_file:
+            logger.info(f"Using run configuration: {config_file}")
+
+            try:
+                config_dict = yaml.safe_load(config_file.read_text())
+            except yaml.parser.ParserError as e:
+                self.parser.error(f"failed to load config file '{config_file}':\n {e}")
+
+            try:
+                config = parse_and_maybe_upgrade_config(config_dict)
+                if not os.path.exists(str(config.external_providers_dir)):
+                    os.makedirs(str(config.external_providers_dir), exist_ok=True)
+            except AttributeError as e:
+                self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
+
+        self._uvicorn_run(config_file, args)
+
+    def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
+        if not config_file:
+            self.parser.error("Config file is required")
+
+        config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
+        with open(config_file) as fp:
+            config_contents = yaml.safe_load(fp)
+            if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
+                logger_config = LoggingConfig(**cfg)
+            else:
+                logger_config = None
+            config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
+
+        port = args.port or config.server.port
+        host = config.server.host or ["::", "0.0.0.0"]
+
+        # Set the config file in environment so create_app can find it
+        os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
+
+        uvicorn_config = {
+            "factory": True,
+            "host": host,
+            "port": port,
+            "lifespan": "on",
+            "log_level": logger.getEffectiveLevel(),
+            "log_config": logger_config,
+        }
+
+        keyfile = config.server.tls_keyfile
+        certfile = config.server.tls_certfile
+        if keyfile and certfile:
+            uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile
+            uvicorn_config["ssl_certfile"] = config.server.tls_certfile
+            if config.server.tls_cafile:
+                uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile
+                uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
+
+            logger.info(
+                f"HTTPS enabled with certificates:\n  Key: {keyfile}\n  Cert: {certfile}\n  CA: {config.server.tls_cafile}"
+            )
+        else:
+            logger.info(f"HTTPS enabled with certificates:\n  Key: {keyfile}\n  Cert: {certfile}")
+
+        logger.info(f"Listening on {host}:{port}")
+
+        # We need to catch KeyboardInterrupt because uvicorn's signal handling
+        # re-raises SIGINT signals using signal.raise_signal(), which Python
+        # converts to KeyboardInterrupt. Without this catch, we'd get a confusing
+        # stack trace when using Ctrl+C or kill -2 (SIGINT).
+        # SIGTERM (kill -15) works fine without this because Python doesn't
+        # have a default handler for it.
+        #
+        # Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
+        # signal handling but this is quite intrusive and not worth the effort.
+        try:
+            uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config)
+        except (KeyboardInterrupt, SystemExit):
+            logger.info("Received interrupt signal, shutting down gracefully...")
+
+    def _start_ui_development_server(self, stack_server_port: int):
+        logger.info("Attempting to start UI development server...")
+        # Check if npm is available
+        npm_check = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=False)
+        if npm_check.returncode != 0:
+            logger.warning(
+                f"'npm' command not found or not executable. UI development server will not be started. Error: {npm_check.stderr}"
+            )
+            return
+
+        ui_dir = REPO_ROOT / "llama_stack" / "ui"
+        logs_dir = Path("~/.llama/ui/logs").expanduser()
+        try:
+            # Create logs directory if it doesn't exist
+            logs_dir.mkdir(parents=True, exist_ok=True)
+
+            ui_stdout_log_path = logs_dir / "stdout.log"
+            ui_stderr_log_path = logs_dir / "stderr.log"
+
+            # Open log files in append mode
+            stdout_log_file = open(ui_stdout_log_path, "a")
+            stderr_log_file = open(ui_stderr_log_path, "a")
+
+            process = subprocess.Popen(
+                ["npm", "run", "dev"],
+                cwd=str(ui_dir),
+                stdout=stdout_log_file,
+                stderr=stderr_log_file,
+                env={**os.environ, "NEXT_PUBLIC_LLAMA_STACK_BASE_URL": f"http://localhost:{stack_server_port}"},
+            )
+            logger.info(f"UI development server process started in {ui_dir} with PID {process.pid}.")
+            logger.info(f"Logs: stdout -> {ui_stdout_log_path}, stderr -> {ui_stderr_log_path}")
+            logger.info(f"UI will be available at http://localhost:{os.getenv('LLAMA_STACK_UI_PORT', 8322)}")
+
+        except FileNotFoundError:
+            logger.error(
+                "Failed to start UI development server: 'npm' command not found. Make sure npm is installed and in your PATH."
+            )
+        except Exception as e:
+            logger.error(f"Failed to start UI development server in {ui_dir}: {e}")
--- a/src/llama_stack/cli/stack/stack.py
+++ b/src/llama_stack/cli/stack/stack.py
@ -0,0 +1,48 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+from importlib.metadata import version
+
+from llama_stack.cli.stack.list_stacks import StackListBuilds
+from llama_stack.cli.stack.utils import print_subcommand_description
+from llama_stack.cli.subcommand import Subcommand
+
+from .list_apis import StackListApis
+from .list_deps import StackListDeps
+from .list_providers import StackListProviders
+from .remove import StackRemove
+from .run import StackRun
+
+
+class StackParser(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "stack",
+            prog="llama stack",
+            description="Operations for the Llama Stack / Distributions",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+
+        self.parser.add_argument(
+            "--version",
+            action="version",
+            version=f"{version('llama-stack')}",
+        )
+
+        self.parser.set_defaults(func=lambda args: self.parser.print_help())
+
+        subparsers = self.parser.add_subparsers(title="stack_subcommands")
+
+        # Add sub-commands
+        StackListDeps.create(subparsers)
+        StackListApis.create(subparsers)
+        StackListProviders.create(subparsers)
+        StackRun.create(subparsers)
+        StackRemove.create(subparsers)
+        StackListBuilds.create(subparsers)
+        print_subcommand_description(self.parser, subparsers)
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@ -0,0 +1,151 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import sys
+from enum import Enum
+from functools import lru_cache
+from pathlib import Path
+
+import yaml
+from termcolor import cprint
+
+from llama_stack.core.datatypes import (
+    BuildConfig,
+    Provider,
+    StackRunConfig,
+    StorageConfig,
+)
+from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.resolver import InvalidProviderError
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+)
+from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.core.utils.image_types import LlamaStackImageType
+from llama_stack.providers.datatypes import Api
+
+TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
+
+
+class ImageType(Enum):
+    CONTAINER = "container"
+    VENV = "venv"
+
+
+def print_subcommand_description(parser, subparsers):
+    """Print descriptions of subcommands."""
+    description_text = ""
+    for name, subcommand in subparsers.choices.items():
+        description = subcommand.description
+        description_text += f"  {name:<21} {description}\n"
+    parser.epilog = description_text
+
+
+def generate_run_config(
+    build_config: BuildConfig,
+    build_dir: Path,
+    image_name: str,
+) -> Path:
+    """
+    Generate a run.yaml template file for user to edit from a build.yaml file
+    """
+    apis = list(build_config.distribution_spec.providers.keys())
+    distro_dir = DISTRIBS_BASE_DIR / image_name
+    run_config = StackRunConfig(
+        container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
+        image_name=image_name,
+        apis=apis,
+        providers={},
+        storage=StorageConfig(
+            backends={
+                "kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
+                "sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
+            },
+            stores=ServerStoresConfig(
+                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
+                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
+                conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
+            ),
+        ),
+        external_providers_dir=build_config.external_providers_dir
+        if build_config.external_providers_dir
+        else EXTERNAL_PROVIDERS_DIR,
+    )
+    # build providers dict
+    provider_registry = get_provider_registry(build_config)
+    for api in apis:
+        run_config.providers[api] = []
+        providers = build_config.distribution_spec.providers[api]
+
+        for provider in providers:
+            pid = provider.provider_type.split("::")[-1]
+
+            p = provider_registry[Api(api)][provider.provider_type]
+            if p.deprecation_error:
+                raise InvalidProviderError(p.deprecation_error)
+
+            try:
+                config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
+            except (ModuleNotFoundError, ValueError) as exc:
+                # HACK ALERT:
+                # This code executes after building is done, the import cannot work since the
+                # package is either available in the venv or container - not available on the host.
+                # TODO: use a "is_external" flag in ProviderSpec to check if the provider is
+                # external
+                cprint(
+                    f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
+                    color="yellow",
+                    file=sys.stderr,
+                )
+                # Set config_type to None to avoid UnboundLocalError
+                config_type = None
+
+            if config_type is not None and hasattr(config_type, "sample_run_config"):
+                config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
+            else:
+                config = {}
+
+            p_spec = Provider(
+                provider_id=pid,
+                provider_type=provider.provider_type,
+                config=config,
+                module=provider.module,
+            )
+            run_config.providers[api].append(p_spec)
+
+    run_config_file = build_dir / f"{image_name}-run.yaml"
+
+    with open(run_config_file, "w") as f:
+        to_write = json.loads(run_config.model_dump_json())
+        f.write(yaml.dump(to_write, sort_keys=False))
+
+    # Only print this message for non-container builds since it will be displayed before the
+    # container is built
+    # For non-container builds, the run.yaml is generated at the very end of the build process so it
+    # makes sense to display this message
+    if build_config.image_type != LlamaStackImageType.CONTAINER.value:
+        cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
+    return run_config_file
+
+
+@lru_cache
+def available_templates_specs() -> dict[str, BuildConfig]:
+    import yaml
+
+    template_specs = {}
+    for p in TEMPLATES_PATH.rglob("*build.yaml"):
+        template_name = p.parent.name
+        with open(p) as f:
+            build_config = BuildConfig(**yaml.safe_load(f))
+            template_specs[template_name] = build_config
+    return template_specs
--- a/src/llama_stack/cli/subcommand.py
+++ b/src/llama_stack/cli/subcommand.py
@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+class Subcommand:
+    """All llama cli subcommands must inherit from this class"""
+
+    def __init__(self, *args, **kwargs):
+        pass
+
+    @classmethod
+    def create(cls, *args, **kwargs):
+        return cls(*args, **kwargs)
+
+    def _add_arguments(self):
+        pass
--- a/src/llama_stack/cli/table.py
+++ b/src/llama_stack/cli/table.py
@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from collections.abc import Iterable
+
+from rich.console import Console
+from rich.table import Table
+
+
+def print_table(rows, headers=None, separate_rows: bool = False, sort_by: Iterable[int] = tuple()):
+    # Convert rows and handle None values
+    rows = [[x or "" for x in row] for row in rows]
+
+    # Sort rows if sort_by is specified
+    if sort_by:
+        rows.sort(key=lambda x: tuple(x[i] for i in sort_by))
+
+    # Create Rich table
+    table = Table(show_lines=separate_rows)
+
+    # Add headers if provided
+    if headers:
+        for header in headers:
+            table.add_column(header, style="bold white")
+    else:
+        # Add unnamed columns based on first row
+        for _ in range(len(rows[0]) if rows else 0):
+            table.add_column()
+
+    # Add rows
+    for row in rows:
+        table.add_row(*row)
+
+    # Print table
+    console = Console()
+    console.print(table)
--- a/src/llama_stack/cli/utils.py
+++ b/src/llama_stack/cli/utils.py
@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.log import get_logger
+
+logger = get_logger(name=__name__, category="cli")
+
+
+# TODO: this can probably just be inlined now?
+def add_config_distro_args(parser: argparse.ArgumentParser):
+    """Add unified config/distro arguments."""
+    group = parser.add_mutually_exclusive_group(required=True)
+
+    group.add_argument(
+        "config",
+        nargs="?",
+        help="Configuration file path or distribution name",
+    )
+
+
+def get_config_from_args(args: argparse.Namespace) -> str | None:
+    if args.config is not None:
+        return str(args.config)
+    return None
--- a/src/llama_stack/core/init.py
+++ b/src/llama_stack/core/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/src/llama_stack/core/access_control/init.py
+++ b/src/llama_stack/core/access_control/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/src/llama_stack/core/access_control/access_control.py
+++ b/src/llama_stack/core/access_control/access_control.py
@ -0,0 +1,131 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.core.datatypes import User
+
+from .conditions import (
+    Condition,
+    ProtectedResource,
+    parse_conditions,
+)
+from .datatypes import (
+    AccessRule,
+    Action,
+    Scope,
+)
+
+
+def matches_resource(resource_scope: str, actual_resource: str) -> bool:
+    if resource_scope == actual_resource:
+        return True
+    return resource_scope.endswith("::*") and actual_resource.startswith(resource_scope[:-1])
+
+
+def matches_scope(
+    scope: Scope,
+    action: Action,
+    resource: str,
+    user: str | None,
+) -> bool:
+    if scope.resource and not matches_resource(scope.resource, resource):
+        return False
+    if scope.principal and scope.principal != user:
+        return False
+    return action in scope.actions
+
+
+def as_list(obj: Any) -> list[Any]:
+    if isinstance(obj, list):
+        return obj
+    return [obj]
+
+
+def matches_conditions(
+    conditions: list[Condition],
+    resource: ProtectedResource,
+    user: User,
+) -> bool:
+    for condition in conditions:
+        # must match all conditions
+        if not condition.matches(resource, user):
+            return False
+    return True
+
+
+def default_policy() -> list[AccessRule]:
+    # for backwards compatibility, if no rules are provided, assume
+    # full access subject to previous attribute matching rules
+    return [
+        AccessRule(
+            permit=Scope(actions=list(Action)),
+            when=["user in owners " + name for name in ["roles", "teams", "projects", "namespaces"]],
+        ),
+    ]
+
+
+def is_action_allowed(
+    policy: list[AccessRule],
+    action: Action,
+    resource: ProtectedResource,
+    user: User | None,
+) -> bool:
+    # If user is not set, assume authentication is not enabled
+    if not user:
+        return True
+
+    if not len(policy):
+        policy = default_policy()
+
+    qualified_resource_id = f"{resource.type}::{resource.identifier}"
+    for rule in policy:
+        if rule.forbid and matches_scope(rule.forbid, action, qualified_resource_id, user.principal):
+            if rule.when:
+                if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
+                    return False
+            elif rule.unless:
+                if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
+                    return False
+            else:
+                return False
+        elif rule.permit and matches_scope(rule.permit, action, qualified_resource_id, user.principal):
+            if rule.when:
+                if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
+                    return True
+            elif rule.unless:
+                if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
+                    return True
+            else:
+                return True
+    # assume access is denied unless we find a rule that permits access
+    return False
+
+
+class AccessDeniedError(RuntimeError):
+    def __init__(self, action: str | None = None, resource: ProtectedResource | None = None, user: User | None = None):
+        self.action = action
+        self.resource = resource
+        self.user = user
+
+        message = _build_access_denied_message(action, resource, user)
+        super().__init__(message)
+
+
+def _build_access_denied_message(action: str | None, resource: ProtectedResource | None, user: User | None) -> str:
+    """Build detailed error message for access denied scenarios."""
+    if action and resource and user:
+        resource_info = f"{resource.type}::{resource.identifier}"
+        user_info = f"'{user.principal}'"
+        if user.attributes:
+            attrs = ", ".join([f"{k}={v}" for k, v in user.attributes.items()])
+            user_info += f" (attributes: {attrs})"
+
+        message = f"User {user_info} cannot perform action '{action}' on resource '{resource_info}'"
+    else:
+        message = "Insufficient permissions"
+
+    return message
--- a/src/llama_stack/core/access_control/conditions.py
+++ b/src/llama_stack/core/access_control/conditions.py
@ -0,0 +1,129 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Protocol
+
+
+class User(Protocol):
+    principal: str
+    attributes: dict[str, list[str]] | None
+
+
+class ProtectedResource(Protocol):
+    type: str
+    identifier: str
+    owner: User
+
+
+class Condition(Protocol):
+    def matches(self, resource: ProtectedResource, user: User) -> bool: ...
+
+
+class UserInOwnersList:
+    def __init__(self, name: str):
+        self.name = name
+
+    def owners_values(self, resource: ProtectedResource) -> list[str] | None:
+        if (
+            hasattr(resource, "owner")
+            and resource.owner
+            and resource.owner.attributes
+            and self.name in resource.owner.attributes
+        ):
+            return resource.owner.attributes[self.name]
+        else:
+            return None
+
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        required = self.owners_values(resource)
+        if not required:
+            return True
+        if not user.attributes or self.name not in user.attributes or not user.attributes[self.name]:
+            return False
+        user_values = user.attributes[self.name]
+        for value in required:
+            if value in user_values:
+                return True
+        return False
+
+    def __repr__(self):
+        return f"user in owners {self.name}"
+
+
+class UserNotInOwnersList(UserInOwnersList):
+    def __init__(self, name: str):
+        super().__init__(name)
+
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        return not super().matches(resource, user)
+
+    def __repr__(self):
+        return f"user not in owners {self.name}"
+
+
+class UserWithValueInList:
+    def __init__(self, name: str, value: str):
+        self.name = name
+        self.value = value
+
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        if user.attributes and self.name in user.attributes:
+            return self.value in user.attributes[self.name]
+        print(f"User does not have {self.value} in {self.name}")
+        return False
+
+    def __repr__(self):
+        return f"user with {self.value} in {self.name}"
+
+
+class UserWithValueNotInList(UserWithValueInList):
+    def __init__(self, name: str, value: str):
+        super().__init__(name, value)
+
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        return not super().matches(resource, user)
+
+    def __repr__(self):
+        return f"user with {self.value} not in {self.name}"
+
+
+class UserIsOwner:
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        return resource.owner.principal == user.principal if resource.owner else False
+
+    def __repr__(self):
+        return "user is owner"
+
+
+class UserIsNotOwner:
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        return not resource.owner or resource.owner.principal != user.principal
+
+    def __repr__(self):
+        return "user is not owner"
+
+
+def parse_condition(condition: str) -> Condition:
+    words = condition.split()
+    match words:
+        case ["user", "is", "owner"]:
+            return UserIsOwner()
+        case ["user", "is", "not", "owner"]:
+            return UserIsNotOwner()
+        case ["user", "with", value, "in", name]:
+            return UserWithValueInList(name, value)
+        case ["user", "with", value, "not", "in", name]:
+            return UserWithValueNotInList(name, value)
+        case ["user", "in", "owners", name]:
+            return UserInOwnersList(name)
+        case ["user", "not", "in", "owners", name]:
+            return UserNotInOwnersList(name)
+        case _:
+            raise ValueError(f"Invalid condition: {condition}")
+
+
+def parse_conditions(conditions: list[str]) -> list[Condition]:
+    return [parse_condition(c) for c in conditions]
--- a/src/llama_stack/core/access_control/datatypes.py
+++ b/src/llama_stack/core/access_control/datatypes.py
@ -0,0 +1,107 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import StrEnum
+from typing import Self
+
+from pydantic import BaseModel, model_validator
+
+from .conditions import parse_conditions
+
+
+class Action(StrEnum):
+    CREATE = "create"
+    READ = "read"
+    UPDATE = "update"
+    DELETE = "delete"
+
+
+class Scope(BaseModel):
+    principal: str | None = None
+    actions: Action | list[Action]
+    resource: str | None = None
+
+
+def _mutually_exclusive(obj, a: str, b: str):
+    if getattr(obj, a) and getattr(obj, b):
+        raise ValueError(f"{a} and {b} are mutually exclusive")
+
+
+def _require_one_of(obj, a: str, b: str):
+    if not getattr(obj, a) and not getattr(obj, b):
+        raise ValueError(f"on of {a} or {b} is required")
+
+
+class AccessRule(BaseModel):
+    """Access rule based loosely on cedar policy language
+
+    A rule defines a list of action either to permit or to forbid. It may specify a
+    principal or a resource that must match for the rule to take effect. The resource
+    to match should be specified in the form of a type qualified identifier, e.g.
+    model::my-model or vector_store::some-db, or a wildcard for all resources of a type,
+    e.g. model::*. If the principal or resource are not specified, they will match all
+    requests.
+
+    A rule may also specify a condition, either a 'when' or an 'unless', with additional
+    constraints as to where the rule applies. The constraints supported at present are:
+
+    - 'user with <attr-value> in <attr-name>'
+    - 'user with <attr-value> not in <attr-name>'
+    - 'user is owner'
+    - 'user is not owner'
+    - 'user in owners <attr-name>'
+    - 'user not in owners <attr-name>'
+
+    Rules are tested in order to find a match. If a match is found, the request is
+    permitted or forbidden depending on the type of rule. If no match is found, the
+    request is denied. If no rules are specified, a rule that allows any action as
+    long as the resource attributes match the user attributes is added
+    (i.e. the previous behaviour is the default).
+
+    Some examples in yaml:
+
+    - permit:
+        principal: user-1
+        actions: [create, read, delete]
+        resource: model::*
+      description: user-1 has full access to all models
+    - permit:
+        principal: user-2
+        actions: [read]
+        resource: model::model-1
+      description: user-2 has read access to model-1 only
+    - permit:
+        actions: [read]
+      when: user in owner teams
+      description: any user has read access to any resource created by a member of their team
+    - forbid:
+        actions: [create, read, delete]
+        resource: vector_store::*
+      unless: user with admin in roles
+      description: only user with admin role can use vector_store resources
+
+    """
+
+    permit: Scope | None = None
+    forbid: Scope | None = None
+    when: str | list[str] | None = None
+    unless: str | list[str] | None = None
+    description: str | None = None
+
+    @model_validator(mode="after")
+    def validate_rule_format(self) -> Self:
+        _require_one_of(self, "permit", "forbid")
+        _mutually_exclusive(self, "permit", "forbid")
+        _mutually_exclusive(self, "when", "unless")
+        if isinstance(self.when, list):
+            parse_conditions(self.when)
+        elif self.when:
+            parse_conditions([self.when])
+        if isinstance(self.unless, list):
+            parse_conditions(self.unless)
+        elif self.unless:
+            parse_conditions([self.unless])
+        return self
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@ -0,0 +1,164 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import importlib.resources
+import sys
+
+from pydantic import BaseModel
+from termcolor import cprint
+
+from llama_stack.core.datatypes import BuildConfig
+from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.utils.exec import run_command
+from llama_stack.core.utils.image_types import LlamaStackImageType
+from llama_stack.distributions.template import DistributionTemplate
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import Api
+
+log = get_logger(name=__name__, category="core")
+
+# These are the dependencies needed by the distribution server.
+# `llama-stack` is automatically installed by the installation script.
+SERVER_DEPENDENCIES = [
+    "aiosqlite",
+    "fastapi",
+    "fire",
+    "httpx",
+    "uvicorn",
+    "opentelemetry-sdk",
+    "opentelemetry-exporter-otlp-proto-http",
+]
+
+
+class ApiInput(BaseModel):
+    api: Api
+    provider: str
+
+
+def get_provider_dependencies(
+    config: BuildConfig | DistributionTemplate,
+) -> tuple[list[str], list[str], list[str]]:
+    """Get normal and special dependencies from provider configuration."""
+    if isinstance(config, DistributionTemplate):
+        config = config.build_config()
+
+    providers = config.distribution_spec.providers
+    additional_pip_packages = config.additional_pip_packages
+
+    deps = []
+    external_provider_deps = []
+    registry = get_provider_registry(config)
+    for api_str, provider_or_providers in providers.items():
+        providers_for_api = registry[Api(api_str)]
+
+        providers = provider_or_providers if isinstance(provider_or_providers, list) else [provider_or_providers]
+
+        for provider in providers:
+            # Providers from BuildConfig and RunConfig are subtly different - not great
+            provider_type = provider if isinstance(provider, str) else provider.provider_type
+
+            if provider_type not in providers_for_api:
+                raise ValueError(f"Provider `{provider}` is not available for API `{api_str}`")
+
+            provider_spec = providers_for_api[provider_type]
+            if hasattr(provider_spec, "is_external") and provider_spec.is_external:
+                # this ensures we install the top level module for our external providers
+                if provider_spec.module:
+                    if isinstance(provider_spec.module, str):
+                        external_provider_deps.append(provider_spec.module)
+                    else:
+                        external_provider_deps.extend(provider_spec.module)
+            if hasattr(provider_spec, "pip_packages"):
+                deps.extend(provider_spec.pip_packages)
+            if hasattr(provider_spec, "container_image") and provider_spec.container_image:
+                raise ValueError("A stack's dependencies cannot have a container image")
+
+    normal_deps = []
+    special_deps = []
+    for package in deps:
+        if any(f in package for f in ["--no-deps", "--index-url", "--extra-index-url"]):
+            special_deps.append(package)
+        else:
+            normal_deps.append(package)
+
+    normal_deps.extend(additional_pip_packages or [])
+
+    return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps))
+
+
+def print_pip_install_help(config: BuildConfig):
+    normal_deps, special_deps, _ = get_provider_dependencies(config)
+
+    cprint(
+        f"Please install needed dependencies using the following commands:\n\nuv pip install {' '.join(normal_deps)}",
+        color="yellow",
+        file=sys.stderr,
+    )
+    for special_dep in special_deps:
+        cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr)
+    print()
+
+
+def build_image(
+    build_config: BuildConfig,
+    image_name: str,
+    distro_or_config: str,
+    run_config: str | None = None,
+):
+    container_base = build_config.distribution_spec.container_image or "python:3.12-slim"
+
+    normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config)
+    normal_deps += SERVER_DEPENDENCIES
+    if build_config.external_apis_dir:
+        external_apis = load_external_apis(build_config)
+        if external_apis:
+            for _, api_spec in external_apis.items():
+                normal_deps.extend(api_spec.pip_packages)
+
+    if build_config.image_type == LlamaStackImageType.CONTAINER.value:
+        script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
+        args = [
+            script,
+            "--distro-or-config",
+            distro_or_config,
+            "--image-name",
+            image_name,
+            "--container-base",
+            container_base,
+            "--normal-deps",
+            " ".join(normal_deps),
+        ]
+        # When building from a config file (not a template), include the run config path in the
+        # build arguments
+        if run_config is not None:
+            args.extend(["--run-config", run_config])
+    else:
+        script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
+        args = [
+            script,
+            "--env-name",
+            str(image_name),
+            "--normal-deps",
+            " ".join(normal_deps),
+        ]
+
+    # Always pass both arguments, even if empty, to maintain consistent positional arguments
+    if special_deps:
+        args.extend(["--optional-deps", "#".join(special_deps)])
+    if external_provider_deps:
+        args.extend(
+            ["--external-provider-deps", "#".join(external_provider_deps)]
+        )  # the script will install external provider module, get its deps, and install those too.
+
+    return_code = run_command(args)
+
+    if return_code != 0:
+        log.error(
+            f"Failed to build target {image_name} with return code {return_code}",
+        )
+
+    return return_code
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@ -0,0 +1,205 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import inspect
+import json
+import sys
+from collections.abc import AsyncIterator
+from enum import Enum
+from typing import Any, Union, get_args, get_origin
+
+import httpx
+from pydantic import BaseModel, parse_obj_as
+from termcolor import cprint
+
+from llama_stack.providers.datatypes import RemoteProviderConfig
+
+_CLIENT_CLASSES = {}
+
+
+async def get_client_impl(protocol, config: RemoteProviderConfig, _deps: Any):
+    client_class = create_api_client_class(protocol)
+    impl = client_class(config.url)
+    await impl.initialize()
+    return impl
+
+
+def create_api_client_class(protocol) -> type:
+    if protocol in _CLIENT_CLASSES:
+        return _CLIENT_CLASSES[protocol]
+
+    class APIClient:
+        def __init__(self, base_url: str):
+            print(f"({protocol.__name__}) Connecting to {base_url}")
+            self.base_url = base_url.rstrip("/")
+            self.routes = {}
+
+            # Store routes for this protocol
+            for name, method in inspect.getmembers(protocol):
+                if hasattr(method, "__webmethod__"):
+                    sig = inspect.signature(method)
+                    self.routes[name] = (method.__webmethod__, sig)
+
+        async def initialize(self):
+            pass
+
+        async def shutdown(self):
+            pass
+
+        async def __acall__(self, method_name: str, *args, **kwargs) -> Any:
+            assert method_name in self.routes, f"Unknown endpoint: {method_name}"
+
+            # TODO: make this more precise, same thing needs to happen in server.py
+            is_streaming = kwargs.get("stream", False)
+            if is_streaming:
+                return self._call_streaming(method_name, *args, **kwargs)
+            else:
+                return await self._call_non_streaming(method_name, *args, **kwargs)
+
+        async def _call_non_streaming(self, method_name: str, *args, **kwargs) -> Any:
+            _, sig = self.routes[method_name]
+
+            if sig.return_annotation is None:
+                return_type = None
+            else:
+                return_type = extract_non_async_iterator_type(sig.return_annotation)
+                assert return_type, f"Could not extract return type for {sig.return_annotation}"
+
+            async with httpx.AsyncClient() as client:
+                params = self.httpx_request_params(method_name, *args, **kwargs)
+                response = await client.request(**params)
+                response.raise_for_status()
+
+                j = response.json()
+                if j is None:
+                    return None
+                # print(f"({protocol.__name__}) Returning {j}, type {return_type}")
+                return parse_obj_as(return_type, j)
+
+        async def _call_streaming(self, method_name: str, *args, **kwargs) -> Any:
+            webmethod, sig = self.routes[method_name]
+
+            return_type = extract_async_iterator_type(sig.return_annotation)
+            assert return_type, f"Could not extract return type for {sig.return_annotation}"
+
+            async with httpx.AsyncClient() as client:
+                params = self.httpx_request_params(method_name, *args, **kwargs)
+                async with client.stream(**params) as response:
+                    response.raise_for_status()
+
+                    async for line in response.aiter_lines():
+                        if line.startswith("data:"):
+                            data = line[len("data: ") :]
+                            try:
+                                data = json.loads(data)
+                                if "error" in data:
+                                    cprint(data, color="red", file=sys.stderr)
+                                    continue
+
+                                yield parse_obj_as(return_type, data)
+                            except Exception as e:
+                                cprint(f"Error with parsing or validation: {e}", color="red", file=sys.stderr)
+                                cprint(data, color="red", file=sys.stderr)
+
+        def httpx_request_params(self, method_name: str, *args, **kwargs) -> dict:
+            webmethod, sig = self.routes[method_name]
+
+            parameters = list(sig.parameters.values())[1:]  # skip `self`
+            for i, param in enumerate(parameters):
+                if i >= len(args):
+                    break
+                kwargs[param.name] = args[i]
+
+            # Get all webmethods for this method (supports multiple decorators)
+            webmethods = getattr(method, "__webmethods__", [])
+
+            if not webmethods:
+                raise RuntimeError(f"Method {method} has no webmethod decorators")
+
+            # Choose the preferred webmethod (non-deprecated if available)
+            preferred_webmethod = None
+            for wm in webmethods:
+                if not getattr(wm, "deprecated", False):
+                    preferred_webmethod = wm
+                    break
+
+            # If no non-deprecated found, use the first one
+            if preferred_webmethod is None:
+                preferred_webmethod = webmethods[0]
+
+            url = f"{self.base_url}/{preferred_webmethod.level}/{preferred_webmethod.route.lstrip('/')}"
+
+            def convert(value):
+                if isinstance(value, list):
+                    return [convert(v) for v in value]
+                elif isinstance(value, dict):
+                    return {k: convert(v) for k, v in value.items()}
+                elif isinstance(value, BaseModel):
+                    return json.loads(value.model_dump_json())
+                elif isinstance(value, Enum):
+                    return value.value
+                else:
+                    return value
+
+            params = {}
+            data = {}
+            if webmethod.method == "GET":
+                params.update(kwargs)
+            else:
+                data.update(convert(kwargs))
+
+            ret = dict(
+                method=webmethod.method or "POST",
+                url=url,
+                headers={
+                    "Accept": "application/json",
+                    "Content-Type": "application/json",
+                },
+                timeout=30,
+            )
+            if params:
+                ret["params"] = params
+            if data:
+                ret["json"] = data
+
+            return ret
+
+    # Add protocol methods to the wrapper
+    for name, method in inspect.getmembers(protocol):
+        if hasattr(method, "__webmethod__"):
+
+            async def method_impl(self, *args, method_name=name, **kwargs):
+                return await self.__acall__(method_name, *args, **kwargs)
+
+            method_impl.__name__ = name
+            method_impl.__qualname__ = f"APIClient.{name}"
+            method_impl.__signature__ = inspect.signature(method)
+            setattr(APIClient, name, method_impl)
+
+    # Name the class after the protocol
+    APIClient.__name__ = f"{protocol.__name__}Client"
+    _CLIENT_CLASSES[protocol] = APIClient
+    return APIClient
+
+
+# not quite general these methods are
+def extract_non_async_iterator_type(type_hint):
+    if get_origin(type_hint) is Union:
+        args = get_args(type_hint)
+        for arg in args:
+            if not issubclass(get_origin(arg) or arg, AsyncIterator):
+                return arg
+    return type_hint
+
+
+def extract_async_iterator_type(type_hint):
+    if get_origin(type_hint) is Union:
+        args = get_args(type_hint)
+        for arg in args:
+            if issubclass(get_origin(arg) or arg, AsyncIterator):
+                inner_args = get_args(arg)
+                return inner_args[0]
+    return None
--- a/src/llama_stack/core/common.sh
+++ b/src/llama_stack/core/common.sh
@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+cleanup() {
+  # For venv environments, no special cleanup is needed
+  # This function exists to avoid "function not found" errors
+  local env_name="$1"
+  echo "Cleanup called for environment: $env_name"
+}
+
+handle_int() {
+  if [ -n "$ENVNAME" ]; then
+    cleanup "$ENVNAME"
+  fi
+  exit 1
+}
+
+handle_exit() {
+  if [ $? -ne 0 ]; then
+    echo -e "\033[1;31mABORTING.\033[0m"
+    if [ -n "$ENVNAME" ]; then
+      cleanup "$ENVNAME"
+    fi
+  fi
+}
+
+
+
+# check if a command is present
+is_command_available() {
+  command -v "$1" &>/dev/null
+}
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@ -0,0 +1,212 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import textwrap
+from typing import Any
+
+from llama_stack.core.datatypes import (
+    LLAMA_STACK_RUN_CONFIG_VERSION,
+    DistributionSpec,
+    Provider,
+    StackRunConfig,
+)
+from llama_stack.core.distribution import (
+    builtin_automatically_routed_apis,
+    get_provider_registry,
+)
+from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
+from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.core.utils.prompt_for_config import prompt_for_config
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import Api, ProviderSpec
+
+logger = get_logger(name=__name__, category="core")
+
+
+def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provider) -> Provider:
+    provider_spec = registry[provider.provider_type]
+    config_type = instantiate_class_type(provider_spec.config_class)
+    try:
+        if provider.config:
+            existing = config_type(**provider.config)
+        else:
+            existing = None
+    except Exception:
+        existing = None
+
+    cfg = prompt_for_config(config_type, existing)
+    return Provider(
+        provider_id=provider.provider_id,
+        provider_type=provider.provider_type,
+        config=cfg.model_dump(),
+    )
+
+
+def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec) -> StackRunConfig:
+    is_nux = len(config.providers) == 0
+
+    if is_nux:
+        logger.info(
+            textwrap.dedent(
+                """
+        Llama Stack is composed of several APIs working together. For each API served by the Stack,
+        we need to configure the providers (implementations) you want to use for these APIs.
+"""
+            )
+        )
+
+    provider_registry = get_provider_registry()
+    builtin_apis = [a.routing_table_api for a in builtin_automatically_routed_apis()]
+
+    if config.apis:
+        apis_to_serve = config.apis
+    else:
+        apis_to_serve = [a.value for a in Api if a not in (Api.inspect, Api.providers)]
+
+    for api_str in apis_to_serve:
+        api = Api(api_str)
+        if api in builtin_apis:
+            continue
+        if api not in provider_registry:
+            raise ValueError(f"Unknown API `{api_str}`")
+
+        existing_providers = config.providers.get(api_str, [])
+        if existing_providers:
+            logger.info(f"Re-configuring existing providers for API `{api_str}`...")
+            updated_providers = []
+            for p in existing_providers:
+                logger.info(f"> Configuring provider `({p.provider_type})`")
+                updated_providers.append(configure_single_provider(provider_registry[api], p))
+                logger.info("")
+        else:
+            # we are newly configuring this API
+            plist = build_spec.providers.get(api_str, [])
+            plist = plist if isinstance(plist, list) else [plist]
+
+            if not plist:
+                raise ValueError(f"No provider configured for API {api_str}?")
+
+            logger.info(f"Configuring API `{api_str}`...")
+            updated_providers = []
+            for i, provider in enumerate(plist):
+                if i >= 1:
+                    others = ", ".join(p.provider_type for p in plist[i:])
+                    logger.info(
+                        f"Not configuring other providers ({others}) interactively. Please edit the resulting YAML directly.\n"
+                    )
+                    break
+
+                logger.info(f"> Configuring provider `({provider.provider_type})`")
+                pid = provider.provider_type.split("::")[-1]
+                updated_providers.append(
+                    configure_single_provider(
+                        provider_registry[api],
+                        Provider(
+                            provider_id=(f"{pid}-{i:02d}" if len(plist) > 1 else pid),
+                            provider_type=provider.provider_type,
+                            config={},
+                        ),
+                    )
+                )
+                logger.info("")
+
+        config.providers[api_str] = updated_providers
+
+    return config
+
+
+def upgrade_from_routing_table(
+    config_dict: dict[str, Any],
+) -> dict[str, Any]:
+    def get_providers(entries):
+        return [
+            Provider(
+                provider_id=(f"{entry['provider_type']}-{i:02d}" if len(entries) > 1 else entry["provider_type"]),
+                provider_type=entry["provider_type"],
+                config=entry["config"],
+            )
+            for i, entry in enumerate(entries)
+        ]
+
+    providers_by_api = {}
+
+    routing_table = config_dict.get("routing_table", {})
+    for api_str, entries in routing_table.items():
+        providers = get_providers(entries)
+        providers_by_api[api_str] = providers
+
+    provider_map = config_dict.get("api_providers", config_dict.get("provider_map", {}))
+    if provider_map:
+        for api_str, provider in provider_map.items():
+            if isinstance(provider, dict) and "provider_type" in provider:
+                providers_by_api[api_str] = [
+                    Provider(
+                        provider_id=f"{provider['provider_type']}",
+                        provider_type=provider["provider_type"],
+                        config=provider["config"],
+                    )
+                ]
+
+    config_dict["providers"] = providers_by_api
+
+    config_dict.pop("routing_table", None)
+    config_dict.pop("api_providers", None)
+    config_dict.pop("provider_map", None)
+
+    config_dict["apis"] = config_dict["apis_to_serve"]
+    config_dict.pop("apis_to_serve", None)
+
+    # Add default storage config if not present
+    if "storage" not in config_dict:
+        config_dict["storage"] = {
+            "backends": {
+                "kv_default": {
+                    "type": "kv_sqlite",
+                    "db_path": "~/.llama/kvstore.db",
+                },
+                "sql_default": {
+                    "type": "sql_sqlite",
+                    "db_path": "~/.llama/sql_store.db",
+                },
+            },
+            "stores": {
+                "metadata": {
+                    "namespace": "registry",
+                    "backend": "kv_default",
+                },
+                "inference": {
+                    "table_name": "inference_store",
+                    "backend": "sql_default",
+                    "max_write_queue_size": 10000,
+                    "num_writers": 4,
+                },
+                "conversations": {
+                    "table_name": "openai_conversations",
+                    "backend": "sql_default",
+                },
+            },
+        }
+
+    return config_dict
+
+
+def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
+    version = config_dict.get("version", None)
+    if version == LLAMA_STACK_RUN_CONFIG_VERSION:
+        processed_config_dict = replace_env_vars(config_dict)
+        return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
+
+    if "routing_table" in config_dict:
+        logger.info("Upgrading config...")
+        config_dict = upgrade_from_routing_table(config_dict)
+
+    config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
+
+    if not config_dict.get("external_providers_dir", None):
+        config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
+
+    processed_config_dict = replace_env_vars(config_dict)
+    return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
--- a/src/llama_stack/core/conversations/init.py
+++ b/src/llama_stack/core/conversations/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@ -0,0 +1,314 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import secrets
+import time
+from typing import Any, Literal
+
+from pydantic import BaseModel, TypeAdapter
+
+from llama_stack.apis.conversations.conversations import (
+    Conversation,
+    ConversationDeletedResource,
+    ConversationItem,
+    ConversationItemDeletedResource,
+    ConversationItemInclude,
+    ConversationItemList,
+    Conversations,
+    Metadata,
+)
+from llama_stack.core.datatypes import AccessRule, StackRunConfig
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
+
+logger = get_logger(name=__name__, category="openai_conversations")
+
+
+class ConversationServiceConfig(BaseModel):
+    """Configuration for the built-in conversation service.
+
+    :param run_config: Stack run configuration for resolving persistence
+    :param policy: Access control rules
+    """
+
+    run_config: StackRunConfig
+    policy: list[AccessRule] = []
+
+
+async def get_provider_impl(config: ConversationServiceConfig, deps: dict[Any, Any]):
+    """Get the conversation service implementation."""
+    impl = ConversationServiceImpl(config, deps)
+    await impl.initialize()
+    return impl
+
+
+class ConversationServiceImpl(Conversations):
+    """Built-in conversation service implementation using AuthorizedSqlStore."""
+
+    def __init__(self, config: ConversationServiceConfig, deps: dict[Any, Any]):
+        self.config = config
+        self.deps = deps
+        self.policy = config.policy
+
+        # Use conversations store reference from run config
+        conversations_ref = config.run_config.storage.stores.conversations
+        if not conversations_ref:
+            raise ValueError("storage.stores.conversations must be configured in run config")
+
+        base_sql_store = sqlstore_impl(conversations_ref)
+        self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
+
+    async def initialize(self) -> None:
+        """Initialize the store and create tables."""
+        await self.sql_store.create_table(
+            "openai_conversations",
+            {
+                "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
+                "created_at": ColumnType.INTEGER,
+                "items": ColumnType.JSON,
+                "metadata": ColumnType.JSON,
+            },
+        )
+
+        await self.sql_store.create_table(
+            "conversation_items",
+            {
+                "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
+                "conversation_id": ColumnType.STRING,
+                "created_at": ColumnType.INTEGER,
+                "item_data": ColumnType.JSON,
+            },
+        )
+
+    async def create_conversation(
+        self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None
+    ) -> Conversation:
+        """Create a conversation."""
+        random_bytes = secrets.token_bytes(24)
+        conversation_id = f"conv_{random_bytes.hex()}"
+        created_at = int(time.time())
+
+        record_data = {
+            "id": conversation_id,
+            "created_at": created_at,
+            "items": [],
+            "metadata": metadata,
+        }
+
+        await self.sql_store.insert(
+            table="openai_conversations",
+            data=record_data,
+        )
+
+        if items:
+            item_records = []
+            for item in items:
+                item_dict = item.model_dump()
+                item_id = self._get_or_generate_item_id(item, item_dict)
+
+                item_record = {
+                    "id": item_id,
+                    "conversation_id": conversation_id,
+                    "created_at": created_at,
+                    "item_data": item_dict,
+                }
+
+                item_records.append(item_record)
+
+            await self.sql_store.insert(table="conversation_items", data=item_records)
+
+        conversation = Conversation(
+            id=conversation_id,
+            created_at=created_at,
+            metadata=metadata,
+            object="conversation",
+        )
+
+        logger.debug(f"Created conversation {conversation_id}")
+        return conversation
+
+    async def get_conversation(self, conversation_id: str) -> Conversation:
+        """Get a conversation with the given ID."""
+        record = await self.sql_store.fetch_one(table="openai_conversations", where={"id": conversation_id})
+
+        if record is None:
+            raise ValueError(f"Conversation {conversation_id} not found")
+
+        return Conversation(
+            id=record["id"], created_at=record["created_at"], metadata=record.get("metadata"), object="conversation"
+        )
+
+    async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
+        """Update a conversation's metadata with the given ID"""
+        await self.sql_store.update(
+            table="openai_conversations", data={"metadata": metadata}, where={"id": conversation_id}
+        )
+
+        return await self.get_conversation(conversation_id)
+
+    async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
+        """Delete a conversation with the given ID."""
+        await self.sql_store.delete(table="openai_conversations", where={"id": conversation_id})
+
+        logger.debug(f"Deleted conversation {conversation_id}")
+        return ConversationDeletedResource(id=conversation_id)
+
+    def _validate_conversation_id(self, conversation_id: str) -> None:
+        """Validate conversation ID format."""
+        if not conversation_id.startswith("conv_"):
+            raise ValueError(
+                f"Invalid 'conversation_id': '{conversation_id}'. Expected an ID that begins with 'conv_'."
+            )
+
+    def _get_or_generate_item_id(self, item: ConversationItem, item_dict: dict) -> str:
+        """Get existing item ID or generate one if missing."""
+        if item.id is None:
+            random_bytes = secrets.token_bytes(24)
+            if item.type == "message":
+                item_id = f"msg_{random_bytes.hex()}"
+            else:
+                item_id = f"item_{random_bytes.hex()}"
+            item_dict["id"] = item_id
+            return item_id
+        return item.id
+
+    async def _get_validated_conversation(self, conversation_id: str) -> Conversation:
+        """Validate conversation ID and return the conversation if it exists."""
+        self._validate_conversation_id(conversation_id)
+        return await self.get_conversation(conversation_id)
+
+    async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
+        """Create (add) items to a conversation."""
+        await self._get_validated_conversation(conversation_id)
+
+        created_items = []
+        base_time = int(time.time())
+
+        for i, item in enumerate(items):
+            item_dict = item.model_dump()
+            item_id = self._get_or_generate_item_id(item, item_dict)
+
+            # make each timestamp unique to maintain order
+            created_at = base_time + i
+
+            item_record = {
+                "id": item_id,
+                "conversation_id": conversation_id,
+                "created_at": created_at,
+                "item_data": item_dict,
+            }
+
+            # TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update
+            try:
+                await self.sql_store.insert(table="conversation_items", data=item_record)
+            except Exception:
+                # If insert fails due to ID conflict, update existing record
+                await self.sql_store.update(
+                    table="conversation_items",
+                    data={"created_at": created_at, "item_data": item_dict},
+                    where={"id": item_id},
+                )
+
+            created_items.append(item_dict)
+
+        logger.debug(f"Created {len(created_items)} items in conversation {conversation_id}")
+
+        # Convert created items (dicts) to proper ConversationItem types
+        adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
+        response_items: list[ConversationItem] = [adapter.validate_python(item_dict) for item_dict in created_items]
+
+        return ConversationItemList(
+            data=response_items,
+            first_id=created_items[0]["id"] if created_items else None,
+            last_id=created_items[-1]["id"] if created_items else None,
+            has_more=False,
+        )
+
+    async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
+        """Retrieve a conversation item."""
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        if not item_id:
+            raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
+
+        # Get item from conversation_items table
+        record = await self.sql_store.fetch_one(
+            table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
+        )
+
+        if record is None:
+            raise ValueError(f"Item {item_id} not found in conversation {conversation_id}")
+
+        adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
+        return adapter.validate_python(record["item_data"])
+
+    async def list_items(
+        self,
+        conversation_id: str,
+        after: str | None = None,
+        include: list[ConversationItemInclude] | None = None,
+        limit: int | None = None,
+        order: Literal["asc", "desc"] | None = None,
+    ) -> ConversationItemList:
+        """List items in the conversation."""
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+
+        # check if conversation exists
+        await self.get_conversation(conversation_id)
+
+        result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id})
+        records = result.data
+
+        if order is not None and order == "asc":
+            records.sort(key=lambda x: x["created_at"])
+        else:
+            records.sort(key=lambda x: x["created_at"], reverse=True)
+
+        actual_limit = limit or 20
+
+        records = records[:actual_limit]
+        items = [record["item_data"] for record in records]
+
+        adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
+        response_items: list[ConversationItem] = [adapter.validate_python(item) for item in items]
+
+        first_id = response_items[0].id if response_items else None
+        last_id = response_items[-1].id if response_items else None
+
+        return ConversationItemList(
+            data=response_items,
+            first_id=first_id,
+            last_id=last_id,
+            has_more=False,
+        )
+
+    async def openai_delete_conversation_item(
+        self, conversation_id: str, item_id: str
+    ) -> ConversationItemDeletedResource:
+        """Delete a conversation item."""
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        if not item_id:
+            raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
+
+        _ = await self._get_validated_conversation(conversation_id)
+
+        record = await self.sql_store.fetch_one(
+            table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
+        )
+
+        if record is None:
+            raise ValueError(f"Item {item_id} not found in conversation {conversation_id}")
+
+        await self.sql_store.delete(
+            table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
+        )
+
+        logger.debug(f"Deleted item {item_id} from conversation {conversation_id}")
+        return ConversationItemDeletedResource(id=item_id)
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@ -0,0 +1,629 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import StrEnum
+from pathlib import Path
+from typing import Annotated, Any, Literal, Self
+from urllib.parse import urlparse
+
+from pydantic import BaseModel, Field, field_validator, model_validator
+
+from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
+from llama_stack.apis.datasetio import DatasetIO
+from llama_stack.apis.datasets import Dataset, DatasetInput
+from llama_stack.apis.eval import Eval
+from llama_stack.apis.inference import Inference
+from llama_stack.apis.models import Model, ModelInput
+from llama_stack.apis.resource import Resource
+from llama_stack.apis.safety import Safety
+from llama_stack.apis.scoring import Scoring
+from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
+from llama_stack.apis.shields import Shield, ShieldInput
+from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
+from llama_stack.apis.vector_io import VectorIO
+from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
+from llama_stack.core.access_control.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import (
+    KVStoreReference,
+    StorageBackendType,
+    StorageConfig,
+)
+from llama_stack.log import LoggingConfig
+from llama_stack.providers.datatypes import Api, ProviderSpec
+
+LLAMA_STACK_BUILD_CONFIG_VERSION = 2
+LLAMA_STACK_RUN_CONFIG_VERSION = 2
+
+
+RoutingKey = str | list[str]
+
+
+class RegistryEntrySource(StrEnum):
+    via_register_api = "via_register_api"
+    listed_from_provider = "listed_from_provider"
+
+
+class User(BaseModel):
+    principal: str
+    # further attributes that may be used for access control decisions
+    attributes: dict[str, list[str]] | None = None
+
+    def __init__(self, principal: str, attributes: dict[str, list[str]] | None):
+        super().__init__(principal=principal, attributes=attributes)
+
+
+class ResourceWithOwner(Resource):
+    """Extension of Resource that adds an optional owner, i.e. the user that created the
+    resource. This can be used to constrain access to the resource."""
+
+    owner: User | None = None
+    source: RegistryEntrySource = RegistryEntrySource.via_register_api
+
+
+# Use the extended Resource for all routable objects
+class ModelWithOwner(Model, ResourceWithOwner):
+    pass
+
+
+class ShieldWithOwner(Shield, ResourceWithOwner):
+    pass
+
+
+class VectorStoreWithOwner(VectorStore, ResourceWithOwner):
+    pass
+
+
+class DatasetWithOwner(Dataset, ResourceWithOwner):
+    pass
+
+
+class ScoringFnWithOwner(ScoringFn, ResourceWithOwner):
+    pass
+
+
+class BenchmarkWithOwner(Benchmark, ResourceWithOwner):
+    pass
+
+
+class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
+    pass
+
+
+RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup
+
+RoutableObjectWithProvider = Annotated[
+    ModelWithOwner
+    | ShieldWithOwner
+    | VectorStoreWithOwner
+    | DatasetWithOwner
+    | ScoringFnWithOwner
+    | BenchmarkWithOwner
+    | ToolGroupWithOwner,
+    Field(discriminator="type"),
+]
+
+RoutedProtocol = Inference | Safety | VectorIO | DatasetIO | Scoring | Eval | ToolRuntime
+
+
+# Example: /inference, /safety
+class AutoRoutedProviderSpec(ProviderSpec):
+    provider_type: str = "router"
+    config_class: str = ""
+
+    container_image: str | None = None
+    routing_table_api: Api
+    module: str
+    provider_data_validator: str | None = Field(
+        default=None,
+    )
+
+
+# Example: /models, /shields
+class RoutingTableProviderSpec(ProviderSpec):
+    provider_type: str = "routing_table"
+    config_class: str = ""
+    container_image: str | None = None
+
+    router_api: Api
+    module: str
+    pip_packages: list[str] = Field(default_factory=list)
+
+
+class Provider(BaseModel):
+    # provider_id of None means that the provider is not enabled - this happens
+    # when the provider is enabled via a conditional environment variable
+    provider_id: str | None
+    provider_type: str
+    config: dict[str, Any] = {}
+    module: str | None = Field(
+        default=None,
+        description="""
+ Fully-qualified name of the external provider module to import. The module is expected to have:
+
+  - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+  Example: `module: ramalama_stack`
+ """,
+    )
+
+
+class BuildProvider(BaseModel):
+    provider_type: str
+    module: str | None = Field(
+        default=None,
+        description="""
+ Fully-qualified name of the external provider module to import. The module is expected to have:
+
+  - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+  Example: `module: ramalama_stack`
+ """,
+    )
+
+
+class DistributionSpec(BaseModel):
+    description: str | None = Field(
+        default="",
+        description="Description of the distribution",
+    )
+    container_image: str | None = None
+    providers: dict[str, list[BuildProvider]] = Field(
+        default_factory=dict,
+        description="""
+        Provider Types for each of the APIs provided by this distribution. If you
+        select multiple providers, you should provide an appropriate 'routing_map'
+        in the runtime configuration to help route to the correct provider.
+        """,
+    )
+
+
+class TelemetryConfig(BaseModel):
+    """
+    Configuration for telemetry.
+
+    Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
+    for env variables to configure the OpenTelemetry SDK.
+
+    Example:
+    ```bash
+    OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
+    ```
+    """
+
+    enabled: bool = Field(default=False, description="enable or disable telemetry")
+
+
+class OAuth2JWKSConfig(BaseModel):
+    # The JWKS URI for collecting public keys
+    uri: str
+    token: str | None = Field(default=None, description="token to authorise access to jwks")
+    key_recheck_period: int = Field(default=3600, description="The period to recheck the JWKS URI for key updates")
+
+
+class OAuth2IntrospectionConfig(BaseModel):
+    url: str
+    client_id: str
+    client_secret: str
+    send_secret_in_body: bool = False
+
+
+class AuthProviderType(StrEnum):
+    """Supported authentication provider types."""
+
+    OAUTH2_TOKEN = "oauth2_token"
+    GITHUB_TOKEN = "github_token"
+    CUSTOM = "custom"
+    KUBERNETES = "kubernetes"
+
+
+class OAuth2TokenAuthConfig(BaseModel):
+    """Configuration for OAuth2 token authentication."""
+
+    type: Literal[AuthProviderType.OAUTH2_TOKEN] = AuthProviderType.OAUTH2_TOKEN
+    audience: str = Field(default="llama-stack")
+    verify_tls: bool = Field(default=True)
+    tls_cafile: Path | None = Field(default=None)
+    issuer: str | None = Field(default=None, description="The OIDC issuer URL.")
+    claims_mapping: dict[str, str] = Field(
+        default_factory=lambda: {
+            "sub": "roles",
+            "username": "roles",
+            "groups": "teams",
+            "team": "teams",
+            "project": "projects",
+            "tenant": "namespaces",
+            "namespace": "namespaces",
+        },
+    )
+    jwks: OAuth2JWKSConfig | None = Field(default=None, description="JWKS configuration")
+    introspection: OAuth2IntrospectionConfig | None = Field(
+        default=None, description="OAuth2 introspection configuration"
+    )
+
+    @classmethod
+    @field_validator("claims_mapping")
+    def validate_claims_mapping(cls, v):
+        for key, value in v.items():
+            if not value:
+                raise ValueError(f"claims_mapping value cannot be empty: {key}")
+        return v
+
+    @model_validator(mode="after")
+    def validate_mode(self) -> Self:
+        if not self.jwks and not self.introspection:
+            raise ValueError("One of jwks or introspection must be configured")
+        if self.jwks and self.introspection:
+            raise ValueError("At present only one of jwks or introspection should be configured")
+        return self
+
+
+class CustomAuthConfig(BaseModel):
+    """Configuration for custom authentication."""
+
+    type: Literal[AuthProviderType.CUSTOM] = AuthProviderType.CUSTOM
+    endpoint: str = Field(
+        ...,
+        description="Custom authentication endpoint URL",
+    )
+
+
+class GitHubTokenAuthConfig(BaseModel):
+    """Configuration for GitHub token authentication."""
+
+    type: Literal[AuthProviderType.GITHUB_TOKEN] = AuthProviderType.GITHUB_TOKEN
+    github_api_base_url: str = Field(
+        default="https://api.github.com",
+        description="Base URL for GitHub API (use https://api.github.com for public GitHub)",
+    )
+    claims_mapping: dict[str, str] = Field(
+        default_factory=lambda: {
+            "login": "roles",
+            "organizations": "teams",
+        },
+        description="Mapping from GitHub user fields to access attributes",
+    )
+
+
+class KubernetesAuthProviderConfig(BaseModel):
+    """Configuration for Kubernetes authentication provider."""
+
+    type: Literal[AuthProviderType.KUBERNETES] = AuthProviderType.KUBERNETES
+    api_server_url: str = Field(
+        default="https://kubernetes.default.svc",
+        description="Kubernetes API server URL (e.g., https://api.cluster.domain:6443)",
+    )
+    verify_tls: bool = Field(default=True, description="Whether to verify TLS certificates")
+    tls_cafile: Path | None = Field(default=None, description="Path to CA certificate file for TLS verification")
+    claims_mapping: dict[str, str] = Field(
+        default_factory=lambda: {
+            "username": "roles",
+            "groups": "roles",
+        },
+        description="Mapping of Kubernetes user claims to access attributes",
+    )
+
+    @field_validator("api_server_url")
+    @classmethod
+    def validate_api_server_url(cls, v):
+        parsed = urlparse(v)
+        if not parsed.scheme or not parsed.netloc:
+            raise ValueError(f"api_server_url must be a valid URL with scheme and host: {v}")
+        if parsed.scheme not in ["http", "https"]:
+            raise ValueError(f"api_server_url scheme must be http or https: {v}")
+        return v
+
+    @field_validator("claims_mapping")
+    @classmethod
+    def validate_claims_mapping(cls, v):
+        for key, value in v.items():
+            if not value:
+                raise ValueError(f"claims_mapping value cannot be empty: {key}")
+        return v
+
+
+AuthProviderConfig = Annotated[
+    OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig | KubernetesAuthProviderConfig,
+    Field(discriminator="type"),
+]
+
+
+class AuthenticationConfig(BaseModel):
+    """Top-level authentication configuration."""
+
+    provider_config: AuthProviderConfig = Field(
+        ...,
+        description="Authentication provider configuration",
+    )
+    access_policy: list[AccessRule] = Field(
+        default=[],
+        description="Rules for determining access to resources",
+    )
+
+
+class AuthenticationRequiredError(Exception):
+    pass
+
+
+class QualifiedModel(BaseModel):
+    """A qualified model identifier, consisting of a provider ID and a model ID."""
+
+    provider_id: str
+    model_id: str
+
+
+class VectorStoresConfig(BaseModel):
+    """Configuration for vector stores in the stack."""
+
+    default_provider_id: str | None = Field(
+        default=None,
+        description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
+    )
+    default_embedding_model: QualifiedModel | None = Field(
+        default=None,
+        description="Default embedding model configuration for vector stores.",
+    )
+
+
+class SafetyConfig(BaseModel):
+    """Configuration for default moderations model."""
+
+    default_shield_id: str | None = Field(
+        default=None,
+        description="ID of the shield to use for when `model` is not specified in the `moderations` API request.",
+    )
+
+
+class QuotaPeriod(StrEnum):
+    DAY = "day"
+
+
+class QuotaConfig(BaseModel):
+    kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
+    anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
+    authenticated_max_requests: int = Field(
+        default=1000, description="Max requests for authenticated clients per period"
+    )
+    period: QuotaPeriod = Field(default=QuotaPeriod.DAY, description="Quota period to set")
+
+
+class CORSConfig(BaseModel):
+    allow_origins: list[str] = Field(default_factory=list)
+    allow_origin_regex: str | None = Field(default=None)
+    allow_methods: list[str] = Field(default=["OPTIONS"])
+    allow_headers: list[str] = Field(default_factory=list)
+    allow_credentials: bool = Field(default=False)
+    expose_headers: list[str] = Field(default_factory=list)
+    max_age: int = Field(default=600, ge=0)
+
+    @model_validator(mode="after")
+    def validate_credentials_config(self) -> Self:
+        if self.allow_credentials and (self.allow_origins == ["*"] or "*" in self.allow_origins):
+            raise ValueError("Cannot use wildcard origins with credentials enabled")
+        return self
+
+
+def process_cors_config(cors_config: bool | CORSConfig | None) -> CORSConfig | None:
+    if cors_config is False or cors_config is None:
+        return None
+
+    if cors_config is True:
+        # dev mode: allow localhost on any port
+        return CORSConfig(
+            allow_origins=[],
+            allow_origin_regex=r"https?://localhost:\d+",
+            allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
+            allow_headers=["Content-Type", "Authorization", "X-Requested-With"],
+        )
+
+    if isinstance(cors_config, CORSConfig):
+        return cors_config
+
+    raise ValueError(f"Expected bool or CORSConfig, got {type(cors_config).__name__}")
+
+
+class RegisteredResources(BaseModel):
+    """Registry of resources available in the distribution."""
+
+    models: list[ModelInput] = Field(default_factory=list)
+    shields: list[ShieldInput] = Field(default_factory=list)
+    vector_stores: list[VectorStoreInput] = Field(default_factory=list)
+    datasets: list[DatasetInput] = Field(default_factory=list)
+    scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
+    benchmarks: list[BenchmarkInput] = Field(default_factory=list)
+    tool_groups: list[ToolGroupInput] = Field(default_factory=list)
+
+
+class ServerConfig(BaseModel):
+    port: int = Field(
+        default=8321,
+        description="Port to listen on",
+        ge=1024,
+        le=65535,
+    )
+    tls_certfile: str | None = Field(
+        default=None,
+        description="Path to TLS certificate file for HTTPS",
+    )
+    tls_keyfile: str | None = Field(
+        default=None,
+        description="Path to TLS key file for HTTPS",
+    )
+    tls_cafile: str | None = Field(
+        default=None,
+        description="Path to TLS CA file for HTTPS with mutual TLS authentication",
+    )
+    auth: AuthenticationConfig | None = Field(
+        default=None,
+        description="Authentication configuration for the server",
+    )
+    host: str | None = Field(
+        default=None,
+        description="The host the server should listen on",
+    )
+    quota: QuotaConfig | None = Field(
+        default=None,
+        description="Per client quota request configuration",
+    )
+    cors: bool | CORSConfig | None = Field(
+        default=None,
+        description="CORS configuration for cross-origin requests. Can be:\n"
+        "- true: Enable localhost CORS for development\n"
+        "- {allow_origins: [...], allow_methods: [...], ...}: Full configuration",
+    )
+
+
+class StackRunConfig(BaseModel):
+    version: int = LLAMA_STACK_RUN_CONFIG_VERSION
+
+    image_name: str = Field(
+        ...,
+        description="""
+Reference to the distribution this package refers to. For unregistered (adhoc) packages,
+this could be just a hash
+""",
+    )
+    container_image: str | None = Field(
+        default=None,
+        description="Reference to the container image if this package refers to a container",
+    )
+    apis: list[str] = Field(
+        default_factory=list,
+        description="""
+The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""",
+    )
+
+    providers: dict[str, list[Provider]] = Field(
+        description="""
+One or more providers to use for each API. The same provider_type (e.g., meta-reference)
+can be instantiated multiple times (with different configs) if necessary.
+""",
+    )
+    storage: StorageConfig = Field(
+        description="Catalog of named storage backends and references available to the stack",
+    )
+
+    registered_resources: RegisteredResources = Field(
+        default_factory=RegisteredResources,
+        description="Registry of resources available in the distribution",
+    )
+
+    logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
+
+    telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
+
+    server: ServerConfig = Field(
+        default_factory=ServerConfig,
+        description="Configuration for the HTTP(S) server",
+    )
+
+    external_providers_dir: Path | None = Field(
+        default=None,
+        description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.",
+    )
+
+    external_apis_dir: Path | None = Field(
+        default=None,
+        description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
+    )
+
+    vector_stores: VectorStoresConfig | None = Field(
+        default=None,
+        description="Configuration for vector stores, including default embedding model",
+    )
+
+    safety: SafetyConfig | None = Field(
+        default=None,
+        description="Configuration for default moderations model",
+    )
+
+    @field_validator("external_providers_dir")
+    @classmethod
+    def validate_external_providers_dir(cls, v):
+        if v is None:
+            return None
+        if isinstance(v, str):
+            return Path(v)
+        return v
+
+    @model_validator(mode="after")
+    def validate_server_stores(self) -> "StackRunConfig":
+        backend_map = self.storage.backends
+        stores = self.storage.stores
+        kv_backends = {
+            name
+            for name, cfg in backend_map.items()
+            if cfg.type
+            in {
+                StorageBackendType.KV_REDIS,
+                StorageBackendType.KV_SQLITE,
+                StorageBackendType.KV_POSTGRES,
+                StorageBackendType.KV_MONGODB,
+            }
+        }
+        sql_backends = {
+            name
+            for name, cfg in backend_map.items()
+            if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
+        }
+
+        def _ensure_backend(reference, expected_set, store_name: str) -> None:
+            if reference is None:
+                return
+            backend_name = reference.backend
+            if backend_name not in backend_map:
+                raise ValueError(
+                    f"{store_name} references unknown backend '{backend_name}'. "
+                    f"Available backends: {sorted(backend_map)}"
+                )
+            if backend_name not in expected_set:
+                raise ValueError(
+                    f"{store_name} references backend '{backend_name}' of type "
+                    f"'{backend_map[backend_name].type.value}', but a backend of type "
+                    f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
+                )
+
+        _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
+        _ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
+        _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
+        _ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
+        _ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts")
+        return self
+
+
+class BuildConfig(BaseModel):
+    version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
+
+    distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
+    image_type: str = Field(
+        default="venv",
+        description="Type of package to build (container | venv)",
+    )
+    image_name: str | None = Field(
+        default=None,
+        description="Name of the distribution to build",
+    )
+    external_providers_dir: Path | None = Field(
+        default=None,
+        description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
+        "pip_packages MUST contain the provider package name.",
+    )
+    additional_pip_packages: list[str] = Field(
+        default_factory=list,
+        description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.",
+    )
+    external_apis_dir: Path | None = Field(
+        default=None,
+        description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
+    )
+
+    @field_validator("external_providers_dir")
+    @classmethod
+    def validate_external_providers_dir(cls, v):
+        if v is None:
+            return None
+        if isinstance(v, str):
+            return Path(v)
+        return v
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@ -0,0 +1,276 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import glob
+import importlib
+import os
+from typing import Any
+
+import yaml
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.external import load_external_apis
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import (
+    Api,
+    InlineProviderSpec,
+    ProviderSpec,
+    RemoteProviderSpec,
+)
+
+logger = get_logger(name=__name__, category="core")
+
+
+INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations}
+
+
+def stack_apis() -> list[Api]:
+    return list(Api)
+
+
+class AutoRoutedApiInfo(BaseModel):
+    routing_table_api: Api
+    router_api: Api
+
+
+def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
+    return [
+        AutoRoutedApiInfo(
+            routing_table_api=Api.models,
+            router_api=Api.inference,
+        ),
+        AutoRoutedApiInfo(
+            routing_table_api=Api.shields,
+            router_api=Api.safety,
+        ),
+        AutoRoutedApiInfo(
+            routing_table_api=Api.datasets,
+            router_api=Api.datasetio,
+        ),
+        AutoRoutedApiInfo(
+            routing_table_api=Api.scoring_functions,
+            router_api=Api.scoring,
+        ),
+        AutoRoutedApiInfo(
+            routing_table_api=Api.benchmarks,
+            router_api=Api.eval,
+        ),
+        AutoRoutedApiInfo(
+            routing_table_api=Api.tool_groups,
+            router_api=Api.tool_runtime,
+        ),
+        AutoRoutedApiInfo(
+            routing_table_api=Api.vector_stores,
+            router_api=Api.vector_io,
+        ),
+    ]
+
+
+def providable_apis() -> list[Api]:
+    routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
+    return [api for api in Api if api not in routing_table_apis and api not in INTERNAL_APIS]
+
+
+def _load_remote_provider_spec(spec_data: dict[str, Any], api: Api) -> ProviderSpec:
+    spec = RemoteProviderSpec(api=api, provider_type=f"remote::{spec_data['adapter_type']}", **spec_data)
+    return spec
+
+
+def _load_inline_provider_spec(spec_data: dict[str, Any], api: Api, provider_name: str) -> ProviderSpec:
+    spec = InlineProviderSpec(api=api, provider_type=f"inline::{provider_name}", **spec_data)
+    return spec
+
+
+def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
+    """Get the provider registry, optionally including external providers.
+
+    This function loads both built-in providers and external providers from YAML files or from their provided modules.
+    External providers are loaded from a directory structure like:
+
+    providers.d/
+      remote/
+        inference/
+          custom_ollama.yaml
+          vllm.yaml
+        vector_io/
+          qdrant.yaml
+        safety/
+          llama-guard.yaml
+      inline/
+        inference/
+          custom_ollama.yaml
+          vllm.yaml
+        vector_io/
+          qdrant.yaml
+        safety/
+          llama-guard.yaml
+
+    This method is overloaded in that it can be called from a variety of places: during build, during run, during stack construction.
+    So when building external providers from a module, there are scenarios where the pip package required to import the module might not be available yet.
+    There is special handling for all of the potential cases this method can be called from.
+
+    Args:
+        config: Optional object containing the external providers directory path
+        building: Optional bool delineating whether or not this is being called from a build process
+
+    Returns:
+        A dictionary mapping APIs to their available providers
+
+    Raises:
+        FileNotFoundError: If the external providers directory doesn't exist
+        ValueError: If any provider spec is invalid
+    """
+
+    registry: dict[Api, dict[str, ProviderSpec]] = {}
+    for api in providable_apis():
+        name = api.name.lower()
+        logger.debug(f"Importing module {name}")
+        try:
+            module = importlib.import_module(f"llama_stack.providers.registry.{name}")
+            registry[api] = {a.provider_type: a for a in module.available_providers()}
+        except ImportError as e:
+            logger.warning(f"Failed to import module {name}: {e}")
+
+    # Refresh providable APIs with external APIs if any
+    external_apis = load_external_apis(config)
+    for api, api_spec in external_apis.items():
+        name = api_spec.name.lower()
+        logger.info(f"Importing external API {name} module {api_spec.module}")
+        try:
+            module = importlib.import_module(api_spec.module)
+            registry[api] = {a.provider_type: a for a in module.available_providers()}
+        except (ImportError, AttributeError) as e:
+            # Populate the registry with an empty dict to avoid breaking the provider registry
+            # This assume that the in-tree provider(s) are not available for this API which means
+            # that users will need to use external providers for this API.
+            registry[api] = {}
+            logger.error(
+                f"Failed to import external API {name}: {e}. Could not populate the in-tree provider(s) registry for {api.name}. \n"
+                "Install the API package to load any in-tree providers for this API."
+            )
+
+    # Check if config has external providers
+    if config:
+        if hasattr(config, "external_providers_dir") and config.external_providers_dir:
+            registry = get_external_providers_from_dir(registry, config)
+        # else lets check for modules in each provider
+        registry = get_external_providers_from_module(
+            registry=registry,
+            config=config,
+            building=(isinstance(config, BuildConfig) or isinstance(config, DistributionSpec)),
+        )
+
+    return registry
+
+
+def get_external_providers_from_dir(
+    registry: dict[Api, dict[str, ProviderSpec]], config
+) -> dict[Api, dict[str, ProviderSpec]]:
+    logger.warning(
+        "Specifying external providers via `external_providers_dir` is being deprecated. Please specify `module:` in the provider instead."
+    )
+    external_providers_dir = os.path.abspath(os.path.expanduser(config.external_providers_dir))
+    if not os.path.exists(external_providers_dir):
+        raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}")
+    logger.info(f"Loading external providers from {external_providers_dir}")
+
+    for api in providable_apis():
+        api_name = api.name.lower()
+
+        # Process both remote and inline providers
+        for provider_type in ["remote", "inline"]:
+            api_dir = os.path.join(external_providers_dir, provider_type, api_name)
+            if not os.path.exists(api_dir):
+                logger.debug(f"No {provider_type} provider directory found for {api_name}")
+                continue
+
+            # Look for provider spec files in the API directory
+            for spec_path in glob.glob(os.path.join(api_dir, "*.yaml")):
+                provider_name = os.path.splitext(os.path.basename(spec_path))[0]
+                logger.info(f"Loading {provider_type} provider spec from {spec_path}")
+
+                try:
+                    with open(spec_path) as f:
+                        spec_data = yaml.safe_load(f)
+
+                    if provider_type == "remote":
+                        spec = _load_remote_provider_spec(spec_data, api)
+                        provider_type_key = f"remote::{provider_name}"
+                    else:
+                        spec = _load_inline_provider_spec(spec_data, api, provider_name)
+                        provider_type_key = f"inline::{provider_name}"
+
+                    logger.info(f"Loaded {provider_type} provider spec for {provider_type_key} from {spec_path}")
+                    if provider_type_key in registry[api]:
+                        logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}")
+                    registry[api][provider_type_key] = spec
+                    logger.info(f"Successfully loaded external provider {provider_type_key}")
+                except yaml.YAMLError as yaml_err:
+                    logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}")
+                    raise yaml_err
+                except Exception as e:
+                    logger.error(f"Failed to load provider spec from {spec_path}: {e}")
+                    raise e
+
+    return registry
+
+
+def get_external_providers_from_module(
+    registry: dict[Api, dict[str, ProviderSpec]], config, building: bool
+) -> dict[Api, dict[str, ProviderSpec]]:
+    provider_list = None
+    if isinstance(config, BuildConfig):
+        provider_list = config.distribution_spec.providers.items()
+    else:
+        provider_list = config.providers.items()
+    if provider_list is None:
+        logger.warning("Could not get list of providers from config")
+        return registry
+    for provider_api, providers in provider_list:
+        for provider in providers:
+            if not hasattr(provider, "module") or provider.module is None:
+                continue
+            # get provider using module
+            try:
+                if not building:
+                    package_name = provider.module.split("==")[0]
+                    module = importlib.import_module(f"{package_name}.provider")
+                    # if config class is wrong you will get an error saying module could not be imported
+                    spec = module.get_provider_spec()
+                else:
+                    # pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
+                    # in the case we are building we CANNOT import this module of course because it has not been installed.
+                    spec = ProviderSpec(
+                        api=Api(provider_api),
+                        provider_type=provider.provider_type,
+                        is_external=True,
+                        module=provider.module,
+                        config_class="",
+                    )
+                provider_type = provider.provider_type
+                if isinstance(spec, list):
+                    # optionally allow people to pass inline and remote provider specs as a returned list.
+                    # with the old method, users could pass in directories of specs using overlapping code
+                    # we want to ensure we preserve that flexibility in this method.
+                    logger.info(
+                        f"Detected a list of external provider specs from {provider.module} adding all to the registry"
+                    )
+                    for provider_spec in spec:
+                        if provider_spec.provider_type != provider.provider_type:
+                            continue
+                        logger.info(f"Adding {provider.provider_type} to registry")
+                        registry[Api(provider_api)][provider.provider_type] = provider_spec
+                else:
+                    registry[Api(provider_api)][provider_type] = spec
+            except ModuleNotFoundError as exc:
+                raise ValueError(
+                    "get_provider_spec not found. If specifying an external provider via `module` in the Provider spec, the Provider must have the `provider.get_provider_spec` module available"
+                ) from exc
+            except Exception as e:
+                logger.error(f"Failed to load provider spec from module {provider.module}: {e}")
+                raise e
+    return registry
--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@ -0,0 +1,54 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import yaml
+
+from llama_stack.apis.datatypes import Api, ExternalApiSpec
+from llama_stack.core.datatypes import BuildConfig, StackRunConfig
+from llama_stack.log import get_logger
+
+logger = get_logger(name=__name__, category="core")
+
+
+def load_external_apis(config: StackRunConfig | BuildConfig | None) -> dict[Api, ExternalApiSpec]:
+    """Load external API specifications from the configured directory.
+
+    Args:
+        config: StackRunConfig or BuildConfig containing the external APIs directory path
+
+    Returns:
+        A dictionary mapping API names to their specifications
+    """
+    if not config or not config.external_apis_dir:
+        return {}
+
+    external_apis_dir = config.external_apis_dir.expanduser().resolve()
+    if not external_apis_dir.is_dir():
+        logger.error(f"External APIs directory is not a directory: {external_apis_dir}")
+        return {}
+
+    logger.info(f"Loading external APIs from {external_apis_dir}")
+    external_apis: dict[Api, ExternalApiSpec] = {}
+
+    # Look for YAML files in the external APIs directory
+    for yaml_path in external_apis_dir.glob("*.yaml"):
+        try:
+            with open(yaml_path) as f:
+                spec_data = yaml.safe_load(f)
+
+            spec = ExternalApiSpec(**spec_data)
+            api = Api.add(spec.name)
+            logger.info(f"Loaded external API spec for {spec.name} from {yaml_path}")
+            external_apis[api] = spec
+        except yaml.YAMLError as yaml_err:
+            logger.error(f"Failed to parse YAML file {yaml_path}: {yaml_err}")
+            raise
+        except Exception:
+            logger.exception(f"Failed to load external API spec from {yaml_path}")
+            raise
+
+    return external_apis
--- a/src/llama_stack/core/id_generation.py
+++ b/src/llama_stack/core/id_generation.py
@ -0,0 +1,42 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from collections.abc import Callable
+
+IdFactory = Callable[[], str]
+IdOverride = Callable[[str, IdFactory], str]
+
+_id_override: IdOverride | None = None
+
+
+def generate_object_id(kind: str, factory: IdFactory) -> str:
+    """Generate an identifier for the given kind using the provided factory.
+
+    Allows tests to override ID generation deterministically by installing an
+    override callback via :func:`set_id_override`.
+    """
+
+    override = _id_override
+    if override is not None:
+        return override(kind, factory)
+    return factory()
+
+
+def set_id_override(override: IdOverride) -> IdOverride | None:
+    """Install an override used to generate deterministic identifiers."""
+
+    global _id_override
+
+    previous = _id_override
+    _id_override = override
+    return previous
+
+
+def reset_id_override(previous: IdOverride | None) -> None:
+    """Restore the previous override returned by :func:`set_id_override`."""
+
+    global _id_override
+    _id_override = previous
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@ -0,0 +1,86 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from importlib.metadata import version
+
+from pydantic import BaseModel
+
+from llama_stack.apis.inspect import (
+    HealthInfo,
+    Inspect,
+    ListRoutesResponse,
+    RouteInfo,
+    VersionInfo,
+)
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.server.routes import get_all_api_routes
+from llama_stack.providers.datatypes import HealthStatus
+
+
+class DistributionInspectConfig(BaseModel):
+    run_config: StackRunConfig
+
+
+async def get_provider_impl(config, deps):
+    impl = DistributionInspectImpl(config, deps)
+    await impl.initialize()
+    return impl
+
+
+class DistributionInspectImpl(Inspect):
+    def __init__(self, config: DistributionInspectConfig, deps):
+        self.config = config
+        self.deps = deps
+
+    async def initialize(self) -> None:
+        pass
+
+    async def list_routes(self) -> ListRoutesResponse:
+        run_config: StackRunConfig = self.config.run_config
+
+        ret = []
+        external_apis = load_external_apis(run_config)
+        all_endpoints = get_all_api_routes(external_apis)
+        for api, endpoints in all_endpoints.items():
+            # Always include provider and inspect APIs, filter others based on run config
+            if api.value in ["providers", "inspect"]:
+                ret.extend(
+                    [
+                        RouteInfo(
+                            route=e.path,
+                            method=next(iter([m for m in e.methods if m != "HEAD"])),
+                            provider_types=[],  # These APIs don't have "real" providers - they're internal to the stack
+                        )
+                        for e, _ in endpoints
+                        if e.methods is not None
+                    ]
+                )
+            else:
+                providers = run_config.providers.get(api.value, [])
+                if providers:  # Only process if there are providers for this API
+                    ret.extend(
+                        [
+                            RouteInfo(
+                                route=e.path,
+                                method=next(iter([m for m in e.methods if m != "HEAD"])),
+                                provider_types=[p.provider_type for p in providers],
+                            )
+                            for e, _ in endpoints
+                            if e.methods is not None
+                        ]
+                    )
+
+        return ListRoutesResponse(data=ret)
+
+    async def health(self) -> HealthInfo:
+        return HealthInfo(status=HealthStatus.OK)
+
+    async def version(self) -> VersionInfo:
+        return VersionInfo(version=version("llama-stack"))
+
+    async def shutdown(self) -> None:
+        pass
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@ -0,0 +1,540 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+import inspect
+import json
+import logging  # allow-direct-logging
+import os
+import sys
+from enum import Enum
+from io import BytesIO
+from pathlib import Path
+from typing import Any, TypeVar, Union, get_args, get_origin
+
+import httpx
+import yaml
+from fastapi import Response as FastAPIResponse
+from llama_stack_client import (
+    NOT_GIVEN,
+    APIResponse,
+    AsyncAPIResponse,
+    AsyncLlamaStackClient,
+    AsyncStream,
+    LlamaStackClient,
+)
+from pydantic import BaseModel, TypeAdapter
+from rich.console import Console
+from termcolor import cprint
+
+from llama_stack.core.build import print_pip_install_help
+from llama_stack.core.configure import parse_and_maybe_upgrade_config
+from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
+from llama_stack.core.request_headers import (
+    PROVIDER_DATA_VAR,
+    request_provider_data_context,
+)
+from llama_stack.core.resolver import ProviderRegistry
+from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
+from llama_stack.core.stack import (
+    Stack,
+    get_stack_run_config_from_distro,
+    replace_env_vars,
+)
+from llama_stack.core.telemetry import Telemetry
+from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
+from llama_stack.core.utils.config import redact_sensitive_fields
+from llama_stack.core.utils.context import preserve_contexts_async_generator
+from llama_stack.core.utils.exec import in_notebook
+from llama_stack.log import get_logger, setup_logging
+from llama_stack.strong_typing.inspection import is_unwrapped_body_param
+
+logger = get_logger(name=__name__, category="core")
+
+T = TypeVar("T")
+
+
+def convert_pydantic_to_json_value(value: Any) -> Any:
+    if isinstance(value, Enum):
+        return value.value
+    elif isinstance(value, list):
+        return [convert_pydantic_to_json_value(item) for item in value]
+    elif isinstance(value, dict):
+        return {k: convert_pydantic_to_json_value(v) for k, v in value.items()}
+    elif isinstance(value, BaseModel):
+        return json.loads(value.model_dump_json())
+    else:
+        return value
+
+
+def convert_to_pydantic(annotation: Any, value: Any) -> Any:
+    if isinstance(annotation, type) and annotation in {str, int, float, bool}:
+        return value
+
+    origin = get_origin(annotation)
+
+    if origin is list:
+        item_type = get_args(annotation)[0]
+        try:
+            return [convert_to_pydantic(item_type, item) for item in value]
+        except Exception:
+            logger.error(f"Error converting list {value} into {item_type}")
+            return value
+
+    elif origin is dict:
+        key_type, val_type = get_args(annotation)
+        try:
+            return {k: convert_to_pydantic(val_type, v) for k, v in value.items()}
+        except Exception:
+            logger.error(f"Error converting dict {value} into {val_type}")
+            return value
+
+    try:
+        # Handle Pydantic models and discriminated unions
+        return TypeAdapter(annotation).validate_python(value)
+
+    except Exception as e:
+        # TODO: this is workardound for having Union[str, AgentToolGroup] in API schema.
+        # We should get rid of any non-discriminated unions in the API schema.
+        if origin is Union:
+            for union_type in get_args(annotation):
+                try:
+                    return convert_to_pydantic(union_type, value)
+                except Exception:
+                    continue
+            logger.warning(
+                f"Warning: direct client failed to convert parameter {value} into {annotation}: {e}",
+            )
+        raise ValueError(f"Failed to convert parameter {value} into {annotation}: {e}") from e
+
+
+class LibraryClientUploadFile:
+    """LibraryClient UploadFile object that mimics FastAPI's UploadFile interface."""
+
+    def __init__(self, filename: str, content: bytes):
+        self.filename = filename
+        self.content = content
+        self.content_type = "application/octet-stream"
+
+    async def read(self) -> bytes:
+        return self.content
+
+
+class LibraryClientHttpxResponse:
+    """LibraryClient httpx Response object for FastAPI Response conversion."""
+
+    def __init__(self, response):
+        self.content = response.body if isinstance(response.body, bytes) else response.body.encode()
+        self.status_code = response.status_code
+        self.headers = response.headers
+
+
+class LlamaStackAsLibraryClient(LlamaStackClient):
+    def __init__(
+        self,
+        config_path_or_distro_name: str,
+        skip_logger_removal: bool = False,
+        custom_provider_registry: ProviderRegistry | None = None,
+        provider_data: dict[str, Any] | None = None,
+    ):
+        super().__init__()
+        self.async_client = AsyncLlamaStackAsLibraryClient(
+            config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal
+        )
+        self.provider_data = provider_data
+
+        self.loop = asyncio.new_event_loop()
+
+        # use a new event loop to avoid interfering with the main event loop
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            loop.run_until_complete(self.async_client.initialize())
+        finally:
+            asyncio.set_event_loop(None)
+
+    def initialize(self):
+        """
+        Deprecated method for backward compatibility.
+        """
+        pass
+
+    def request(self, *args, **kwargs):
+        loop = self.loop
+        asyncio.set_event_loop(loop)
+
+        if kwargs.get("stream"):
+
+            def sync_generator():
+                try:
+                    async_stream = loop.run_until_complete(self.async_client.request(*args, **kwargs))
+                    while True:
+                        chunk = loop.run_until_complete(async_stream.__anext__())
+                        yield chunk
+                except StopAsyncIteration:
+                    pass
+                finally:
+                    pending = asyncio.all_tasks(loop)
+                    if pending:
+                        loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
+
+            return sync_generator()
+        else:
+            try:
+                result = loop.run_until_complete(self.async_client.request(*args, **kwargs))
+            finally:
+                pending = asyncio.all_tasks(loop)
+                if pending:
+                    loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
+            return result
+
+
+class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
+    def __init__(
+        self,
+        config_path_or_distro_name: str,
+        custom_provider_registry: ProviderRegistry | None = None,
+        provider_data: dict[str, Any] | None = None,
+        skip_logger_removal: bool = False,
+    ):
+        super().__init__()
+        # Initialize logging from environment variables first
+        setup_logging()
+
+        # when using the library client, we should not log to console since many
+        # of our logs are intended for server-side usage
+        if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
+            current_sinks = sinks_from_env.strip().lower().split(",")
+            os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
+
+        if in_notebook():
+            import nest_asyncio
+
+            nest_asyncio.apply()
+            if not skip_logger_removal:
+                self._remove_root_logger_handlers()
+
+        if config_path_or_distro_name.endswith(".yaml"):
+            config_path = Path(config_path_or_distro_name)
+            if not config_path.exists():
+                raise ValueError(f"Config file {config_path} does not exist")
+            config_dict = replace_env_vars(yaml.safe_load(config_path.read_text()))
+            config = parse_and_maybe_upgrade_config(config_dict)
+        else:
+            # distribution
+            config = get_stack_run_config_from_distro(config_path_or_distro_name)
+
+        self.config_path_or_distro_name = config_path_or_distro_name
+        self.config = config
+        self.custom_provider_registry = custom_provider_registry
+        self.provider_data = provider_data
+        self.route_impls: RouteImpls | None = None  # Initialize to None to prevent AttributeError
+
+    def _remove_root_logger_handlers(self):
+        """
+        Remove all handlers from the root logger. Needed to avoid polluting the console with logs.
+        """
+        root_logger = logging.getLogger()
+
+        for handler in root_logger.handlers[:]:
+            root_logger.removeHandler(handler)
+            logger.info(f"Removed handler {handler.__class__.__name__} from root logger")
+
+    async def initialize(self) -> bool:
+        """
+        Initialize the async client.
+
+        Returns:
+            bool: True if initialization was successful
+        """
+
+        try:
+            self.route_impls = None
+
+            stack = Stack(self.config, self.custom_provider_registry)
+            await stack.initialize()
+            self.impls = stack.impls
+        except ModuleNotFoundError as _e:
+            cprint(_e.msg, color="red", file=sys.stderr)
+            cprint(
+                "Using llama-stack as a library requires installing dependencies depending on the distribution (providers) you choose.\n",
+                color="yellow",
+                file=sys.stderr,
+            )
+            if self.config_path_or_distro_name.endswith(".yaml"):
+                providers: dict[str, list[BuildProvider]] = {}
+                for api, run_providers in self.config.providers.items():
+                    for provider in run_providers:
+                        providers.setdefault(api, []).append(
+                            BuildProvider(provider_type=provider.provider_type, module=provider.module)
+                        )
+                providers = dict(providers)
+                build_config = BuildConfig(
+                    distribution_spec=DistributionSpec(
+                        providers=providers,
+                    ),
+                    external_providers_dir=self.config.external_providers_dir,
+                )
+                print_pip_install_help(build_config)
+            else:
+                prefix = "!" if in_notebook() else ""
+                cprint(
+                    f"Please run:\n\n{prefix}llama stack list-deps {self.config_path_or_distro_name} | xargs -L1 uv pip install\n\n",
+                    "yellow",
+                    file=sys.stderr,
+                )
+            cprint(
+                "Please check your internet connection and try again.",
+                "red",
+                file=sys.stderr,
+            )
+            raise _e
+
+        assert self.impls is not None
+        if self.config.telemetry.enabled:
+            setup_logger(Telemetry())
+
+        if not os.environ.get("PYTEST_CURRENT_TEST"):
+            console = Console()
+            console.print(f"Using config [blue]{self.config_path_or_distro_name}[/blue]:")
+            safe_config = redact_sensitive_fields(self.config.model_dump())
+            console.print(yaml.dump(safe_config, indent=2))
+
+        self.route_impls = initialize_route_impls(self.impls)
+        return True
+
+    async def request(
+        self,
+        cast_to: Any,
+        options: Any,
+        *,
+        stream=False,
+        stream_cls=None,
+    ):
+        if self.route_impls is None:
+            raise ValueError("Client not initialized. Please call initialize() first.")
+
+        # Create headers with provider data if available
+        headers = options.headers or {}
+        if self.provider_data:
+            keys = ["X-LlamaStack-Provider-Data", "x-llamastack-provider-data"]
+            if all(key not in headers for key in keys):
+                headers["X-LlamaStack-Provider-Data"] = json.dumps(self.provider_data)
+
+        # Use context manager for provider data
+        with request_provider_data_context(headers):
+            if stream:
+                response = await self._call_streaming(
+                    cast_to=cast_to,
+                    options=options,
+                    stream_cls=stream_cls,
+                )
+            else:
+                response = await self._call_non_streaming(
+                    cast_to=cast_to,
+                    options=options,
+                )
+            return response
+
+    def _handle_file_uploads(self, options: Any, body: dict) -> tuple[dict, list[str]]:
+        """Handle file uploads from OpenAI client and add them to the request body."""
+        if not (hasattr(options, "files") and options.files):
+            return body, []
+
+        if not isinstance(options.files, list):
+            return body, []
+
+        field_names = []
+        for file_tuple in options.files:
+            if not (isinstance(file_tuple, tuple) and len(file_tuple) >= 2):
+                continue
+
+            field_name = file_tuple[0]
+            file_object = file_tuple[1]
+
+            if isinstance(file_object, BytesIO):
+                file_object.seek(0)
+                file_content = file_object.read()
+                filename = getattr(file_object, "name", "uploaded_file")
+                field_names.append(field_name)
+                body[field_name] = LibraryClientUploadFile(filename, file_content)
+
+        return body, field_names
+
+    async def _call_non_streaming(
+        self,
+        *,
+        cast_to: Any,
+        options: Any,
+    ):
+        assert self.route_impls is not None  # Should be guaranteed by request() method, assertion for mypy
+        path = options.url
+        body = options.params or {}
+        body |= options.json_data or {}
+
+        # Merge extra_json parameters (extra_body from SDK is converted to extra_json)
+        if hasattr(options, "extra_json") and options.extra_json:
+            body |= options.extra_json
+
+        matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
+        body |= path_params
+
+        body, field_names = self._handle_file_uploads(options, body)
+
+        body = self._convert_body(matched_func, body, exclude_params=set(field_names))
+
+        trace_path = webmethod.descriptive_name or route_path
+        await start_trace(trace_path, {"__location__": "library_client"})
+        try:
+            result = await matched_func(**body)
+        finally:
+            await end_trace()
+
+        # Handle FastAPI Response objects (e.g., from file content retrieval)
+        if isinstance(result, FastAPIResponse):
+            return LibraryClientHttpxResponse(result)
+
+        json_content = json.dumps(convert_pydantic_to_json_value(result))
+
+        filtered_body = {k: v for k, v in body.items() if not isinstance(v, LibraryClientUploadFile)}
+
+        status_code = httpx.codes.OK
+
+        if options.method.upper() == "DELETE" and result is None:
+            status_code = httpx.codes.NO_CONTENT
+
+        if status_code == httpx.codes.NO_CONTENT:
+            json_content = ""
+
+        mock_response = httpx.Response(
+            status_code=status_code,
+            content=json_content.encode("utf-8"),
+            headers={
+                "Content-Type": "application/json",
+            },
+            request=httpx.Request(
+                method=options.method,
+                url=options.url,
+                params=options.params,
+                headers=options.headers or {},
+                json=convert_pydantic_to_json_value(filtered_body),
+            ),
+        )
+        response = APIResponse(
+            raw=mock_response,
+            client=self,
+            cast_to=cast_to,
+            options=options,
+            stream=False,
+            stream_cls=None,
+        )
+        return response.parse()
+
+    async def _call_streaming(
+        self,
+        *,
+        cast_to: Any,
+        options: Any,
+        stream_cls: Any,
+    ):
+        assert self.route_impls is not None  # Should be guaranteed by request() method, assertion for mypy
+        path = options.url
+        body = options.params or {}
+        body |= options.json_data or {}
+        func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
+        body |= path_params
+
+        # Prepare body for the function call (handles both Pydantic and traditional params)
+        body = self._convert_body(func, body)
+
+        trace_path = webmethod.descriptive_name or route_path
+        await start_trace(trace_path, {"__location__": "library_client"})
+
+        async def gen():
+            try:
+                async for chunk in await func(**body):
+                    data = json.dumps(convert_pydantic_to_json_value(chunk))
+                    sse_event = f"data: {data}\n\n"
+                    yield sse_event.encode("utf-8")
+            finally:
+                await end_trace()
+
+        wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
+
+        mock_response = httpx.Response(
+            status_code=httpx.codes.OK,
+            content=wrapped_gen,
+            headers={
+                "Content-Type": "application/json",
+            },
+            request=httpx.Request(
+                method=options.method,
+                url=options.url,
+                params=options.params,
+                headers=options.headers or {},
+                json=convert_pydantic_to_json_value(body),
+            ),
+        )
+
+        # we use asynchronous impl always internally and channel all requests to AsyncLlamaStackClient
+        # however, the top-level caller may be a SyncAPIClient -- so its stream_cls might be a Stream (SyncStream)
+        # so we need to convert it to AsyncStream
+        # mypy can't track runtime variables inside the [...] of a generic, so ignore that check
+        args = get_args(stream_cls)
+        stream_cls = AsyncStream[args[0]]  # type: ignore[valid-type]
+        response = AsyncAPIResponse(
+            raw=mock_response,
+            client=self,
+            cast_to=cast_to,
+            options=options,
+            stream=True,
+            stream_cls=stream_cls,
+        )
+        return await response.parse()
+
+    def _convert_body(self, func: Any, body: dict | None = None, exclude_params: set[str] | None = None) -> dict:
+        body = body or {}
+        exclude_params = exclude_params or set()
+        sig = inspect.signature(func)
+        params_list = [p for p in sig.parameters.values() if p.name != "self"]
+
+        # Flatten if there's a single unwrapped body parameter (BaseModel or Annotated[BaseModel, Body(embed=False)])
+        if len(params_list) == 1:
+            param = params_list[0]
+            param_type = param.annotation
+            if is_unwrapped_body_param(param_type):
+                base_type = get_args(param_type)[0]
+                return {param.name: base_type(**body)}
+
+        # Strip NOT_GIVENs to use the defaults in signature
+        body = {k: v for k, v in body.items() if v is not NOT_GIVEN}
+
+        # Check if there's an unwrapped body parameter among multiple parameters
+        # (e.g., path param + body param like: vector_store_id: str, params: Annotated[Model, Body(...)])
+        unwrapped_body_param = None
+        for param in params_list:
+            if is_unwrapped_body_param(param.annotation):
+                unwrapped_body_param = param
+                break
+
+        # Convert parameters to Pydantic models where needed
+        converted_body = {}
+        for param_name, param in sig.parameters.items():
+            if param_name in body:
+                value = body.get(param_name)
+                if param_name in exclude_params:
+                    converted_body[param_name] = value
+                else:
+                    converted_body[param_name] = convert_to_pydantic(param.annotation, value)
+
+        # handle unwrapped body parameter after processing all named parameters
+        if unwrapped_body_param:
+            base_type = get_args(unwrapped_body_param.annotation)[0]
+            # extract only keys not already used by other params
+            remaining_keys = {k: v for k, v in body.items() if k not in converted_body}
+            converted_body[unwrapped_body_param.name] = base_type(**remaining_keys)
+
+        return converted_body
--- a/src/llama_stack/core/prompts/init.py
+++ b/src/llama_stack/core/prompts/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@ -0,0 +1,232 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+from typing import Any
+
+from pydantic import BaseModel
+
+from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
+
+
+class PromptServiceConfig(BaseModel):
+    """Configuration for the built-in prompt service.
+
+    :param run_config: Stack run configuration containing distribution info
+    """
+
+    run_config: StackRunConfig
+
+
+async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]):
+    """Get the prompt service implementation."""
+    impl = PromptServiceImpl(config, deps)
+    await impl.initialize()
+    return impl
+
+
+class PromptServiceImpl(Prompts):
+    """Built-in prompt service implementation using KVStore."""
+
+    def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]):
+        self.config = config
+        self.deps = deps
+        self.kvstore: KVStore
+
+    async def initialize(self) -> None:
+        # Use prompts store reference from run config
+        prompts_ref = self.config.run_config.storage.stores.prompts
+        if not prompts_ref:
+            raise ValueError("storage.stores.prompts must be configured in run config")
+        self.kvstore = await kvstore_impl(prompts_ref)
+
+    def _get_default_key(self, prompt_id: str) -> str:
+        """Get the KVStore key that stores the default version number."""
+        return f"prompts:v1:{prompt_id}:default"
+
+    async def _get_prompt_key(self, prompt_id: str, version: int | None = None) -> str:
+        """Get the KVStore key for prompt data, returning default version if applicable."""
+        if version:
+            return self._get_version_key(prompt_id, str(version))
+
+        default_key = self._get_default_key(prompt_id)
+        resolved_version = await self.kvstore.get(default_key)
+        if resolved_version is None:
+            raise ValueError(f"Prompt {prompt_id}:default not found")
+        return self._get_version_key(prompt_id, resolved_version)
+
+    def _get_version_key(self, prompt_id: str, version: str) -> str:
+        """Get the KVStore key for a specific prompt version."""
+        return f"prompts:v1:{prompt_id}:{version}"
+
+    def _get_list_key_prefix(self) -> str:
+        """Get the key prefix for listing prompts."""
+        return "prompts:v1:"
+
+    def _serialize_prompt(self, prompt: Prompt) -> str:
+        """Serialize a prompt to JSON string for storage."""
+        return json.dumps(
+            {
+                "prompt_id": prompt.prompt_id,
+                "prompt": prompt.prompt,
+                "version": prompt.version,
+                "variables": prompt.variables or [],
+                "is_default": prompt.is_default,
+            }
+        )
+
+    def _deserialize_prompt(self, data: str) -> Prompt:
+        """Deserialize a prompt from JSON string."""
+        obj = json.loads(data)
+        return Prompt(
+            prompt_id=obj["prompt_id"],
+            prompt=obj["prompt"],
+            version=obj["version"],
+            variables=obj.get("variables", []),
+            is_default=obj.get("is_default", False),
+        )
+
+    async def list_prompts(self) -> ListPromptsResponse:
+        """List all prompts (default versions only)."""
+        prefix = self._get_list_key_prefix()
+        keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
+
+        prompts = []
+        for key in keys:
+            if key.endswith(":default"):
+                try:
+                    default_version = await self.kvstore.get(key)
+                    if default_version:
+                        prompt_id = key.replace(prefix, "").replace(":default", "")
+                        version_key = self._get_version_key(prompt_id, default_version)
+                        data = await self.kvstore.get(version_key)
+                        if data:
+                            prompt = self._deserialize_prompt(data)
+                            prompts.append(prompt)
+                except (json.JSONDecodeError, KeyError):
+                    continue
+
+        prompts.sort(key=lambda p: p.prompt_id or "", reverse=True)
+        return ListPromptsResponse(data=prompts)
+
+    async def get_prompt(self, prompt_id: str, version: int | None = None) -> Prompt:
+        """Get a prompt by its identifier and optional version."""
+        key = await self._get_prompt_key(prompt_id, version)
+        data = await self.kvstore.get(key)
+        if data is None:
+            raise ValueError(f"Prompt {prompt_id}:{version if version else 'default'} not found")
+        return self._deserialize_prompt(data)
+
+    async def create_prompt(
+        self,
+        prompt: str,
+        variables: list[str] | None = None,
+    ) -> Prompt:
+        """Create a new prompt."""
+        if variables is None:
+            variables = []
+
+        prompt_obj = Prompt(
+            prompt_id=Prompt.generate_prompt_id(),
+            prompt=prompt,
+            version=1,
+            variables=variables,
+        )
+
+        version_key = self._get_version_key(prompt_obj.prompt_id, str(prompt_obj.version))
+        data = self._serialize_prompt(prompt_obj)
+        await self.kvstore.set(version_key, data)
+
+        default_key = self._get_default_key(prompt_obj.prompt_id)
+        await self.kvstore.set(default_key, str(prompt_obj.version))
+
+        return prompt_obj
+
+    async def update_prompt(
+        self,
+        prompt_id: str,
+        prompt: str,
+        version: int,
+        variables: list[str] | None = None,
+        set_as_default: bool = True,
+    ) -> Prompt:
+        """Update an existing prompt (increments version)."""
+        if version < 1:
+            raise ValueError("Version must be >= 1")
+        if variables is None:
+            variables = []
+
+        prompt_versions = await self.list_prompt_versions(prompt_id)
+        latest_prompt = max(prompt_versions.data, key=lambda x: int(x.version))
+
+        if version and latest_prompt.version != version:
+            raise ValueError(
+                f"'{version}' is not the latest prompt version for prompt_id='{prompt_id}'. Use the latest version '{latest_prompt.version}' in request."
+            )
+
+        current_version = latest_prompt.version if version is None else version
+        new_version = current_version + 1
+
+        updated_prompt = Prompt(prompt_id=prompt_id, prompt=prompt, version=new_version, variables=variables)
+
+        version_key = self._get_version_key(prompt_id, str(new_version))
+        data = self._serialize_prompt(updated_prompt)
+        await self.kvstore.set(version_key, data)
+
+        if set_as_default:
+            await self.set_default_version(prompt_id, new_version)
+
+        return updated_prompt
+
+    async def delete_prompt(self, prompt_id: str) -> None:
+        """Delete a prompt and all its versions."""
+        await self.get_prompt(prompt_id)
+
+        prefix = f"prompts:v1:{prompt_id}:"
+        keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
+
+        for key in keys:
+            await self.kvstore.delete(key)
+
+    async def list_prompt_versions(self, prompt_id: str) -> ListPromptsResponse:
+        """List all versions of a specific prompt."""
+        prefix = f"prompts:v1:{prompt_id}:"
+        keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
+
+        default_version = None
+        prompts = []
+
+        for key in keys:
+            data = await self.kvstore.get(key)
+            if key.endswith(":default"):
+                default_version = data
+            else:
+                if data:
+                    prompt_obj = self._deserialize_prompt(data)
+                    prompts.append(prompt_obj)
+
+        if not prompts:
+            raise ValueError(f"Prompt {prompt_id} not found")
+
+        for prompt in prompts:
+            prompt.is_default = str(prompt.version) == default_version
+
+        prompts.sort(key=lambda x: x.version)
+        return ListPromptsResponse(data=prompts)
+
+    async def set_default_version(self, prompt_id: str, version: int) -> Prompt:
+        """Set which version of a prompt should be the default, If not set. the default is the latest."""
+        version_key = self._get_version_key(prompt_id, str(version))
+        data = await self.kvstore.get(version_key)
+        if data is None:
+            raise ValueError(f"Prompt {prompt_id} version {version} not found")
+
+        default_key = self._get_default_key(prompt_id)
+        await self.kvstore.set(default_key, str(version))
+
+        return self._deserialize_prompt(data)
--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@ -0,0 +1,137 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+from typing import Any
+
+from pydantic import BaseModel
+
+from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import HealthResponse, HealthStatus
+
+from .datatypes import StackRunConfig
+from .utils.config import redact_sensitive_fields
+
+logger = get_logger(name=__name__, category="core")
+
+
+class ProviderImplConfig(BaseModel):
+    run_config: StackRunConfig
+
+
+async def get_provider_impl(config, deps):
+    impl = ProviderImpl(config, deps)
+    await impl.initialize()
+    return impl
+
+
+class ProviderImpl(Providers):
+    def __init__(self, config, deps):
+        self.config = config
+        self.deps = deps
+
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        logger.debug("ProviderImpl.shutdown")
+        pass
+
+    async def list_providers(self) -> ListProvidersResponse:
+        run_config = self.config.run_config
+        safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
+        providers_health = await self.get_providers_health()
+        ret = []
+        for api, providers in safe_config.providers.items():
+            for p in providers:
+                # Skip providers that are not enabled
+                if p.provider_id is None:
+                    continue
+                ret.append(
+                    ProviderInfo(
+                        api=api,
+                        provider_id=p.provider_id,
+                        provider_type=p.provider_type,
+                        config=p.config,
+                        health=providers_health.get(api, {}).get(
+                            p.provider_id,
+                            HealthResponse(
+                                status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
+                            ),
+                        ),
+                    )
+                )
+
+        return ListProvidersResponse(data=ret)
+
+    async def inspect_provider(self, provider_id: str) -> ProviderInfo:
+        all_providers = await self.list_providers()
+        for p in all_providers.data:
+            if p.provider_id == provider_id:
+                return p
+
+        raise ValueError(f"Provider {provider_id} not found")
+
+    async def get_providers_health(self) -> dict[str, dict[str, HealthResponse]]:
+        """Get health status for all providers.
+
+        Returns:
+            Dict[str, Dict[str, HealthResponse]]: A dictionary mapping API names to provider health statuses.
+                Each API maps to a dictionary of provider IDs to their health responses.
+        """
+        providers_health: dict[str, dict[str, HealthResponse]] = {}
+
+        # The timeout has to be long enough to allow all the providers to be checked, especially in
+        # the case of the inference router health check since it checks all registered inference
+        # providers.
+        # The timeout must not be equal to the one set by health method for a given implementation,
+        # otherwise we will miss some providers.
+        timeout = 3.0
+
+        async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None:
+            # Skip special implementations (inspect/providers) that don't have provider specs
+            if not hasattr(impl, "__provider_spec__"):
+                return None
+            api_name = impl.__provider_spec__.api.name
+            if not hasattr(impl, "health"):
+                return (
+                    api_name,
+                    HealthResponse(
+                        status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
+                    ),
+                )
+
+            try:
+                health = await asyncio.wait_for(impl.health(), timeout=timeout)
+                return api_name, health
+            except TimeoutError:
+                return (
+                    api_name,
+                    HealthResponse(
+                        status=HealthStatus.ERROR, message=f"Health check timed out after {timeout} seconds"
+                    ),
+                )
+            except Exception as e:
+                return (
+                    api_name,
+                    HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"),
+                )
+
+        # Create tasks for all providers
+        tasks = [check_provider_health(impl) for impl in self.deps.values()]
+
+        # Wait for all health checks to complete
+        results = await asyncio.gather(*tasks)
+
+        # Organize results by API and provider ID
+        for result in results:
+            if result is None:  # Skip special implementations
+                continue
+            api_name, health_response = result
+            providers_health[api_name] = health_response
+
+        return providers_health
--- a/src/llama_stack/core/request_headers.py
+++ b/src/llama_stack/core/request_headers.py
@ -0,0 +1,115 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import contextvars
+import json
+from contextlib import AbstractContextManager
+from typing import Any
+
+from llama_stack.core.datatypes import User
+from llama_stack.log import get_logger
+
+from .utils.dynamic import instantiate_class_type
+
+log = get_logger(name=__name__, category="core")
+
+# Context variable for request provider data and auth attributes
+PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)
+
+
+class RequestProviderDataContext(AbstractContextManager):
+    """Context manager for request provider data"""
+
+    def __init__(self, provider_data: dict[str, Any] | None = None, user: User | None = None):
+        self.provider_data = provider_data or {}
+        if user:
+            self.provider_data["__authenticated_user"] = user
+
+        self.token = None
+
+    def __enter__(self):
+        # Save the current value and set the new one
+        self.token = PROVIDER_DATA_VAR.set(self.provider_data)
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        # Restore the previous value
+        if self.token is not None:
+            PROVIDER_DATA_VAR.reset(self.token)
+
+
+class NeedsRequestProviderData:
+    def get_request_provider_data(self) -> Any:
+        spec = self.__provider_spec__
+        if not spec:
+            raise ValueError(f"Provider spec not set on {self.__class__}")
+
+        provider_type = spec.provider_type
+        validator_class = spec.provider_data_validator
+        if not validator_class:
+            raise ValueError(f"Provider {provider_type} does not have a validator")
+
+        val = PROVIDER_DATA_VAR.get()
+        if not val:
+            return None
+
+        validator = instantiate_class_type(validator_class)
+        try:
+            provider_data = validator(**val)
+            return provider_data
+        except Exception as e:
+            log.error(f"Error parsing provider data: {e}")
+            return None
+
+
+def parse_request_provider_data(headers: dict[str, str]) -> dict[str, Any] | None:
+    """Parse provider data from request headers"""
+    keys = [
+        "X-LlamaStack-Provider-Data",
+        "x-llamastack-provider-data",
+    ]
+    val = None
+    for key in keys:
+        val = headers.get(key, None)
+        if val:
+            break
+
+    if not val:
+        return None
+
+    try:
+        return json.loads(val)
+    except json.JSONDecodeError:
+        log.error("Provider data not encoded as a JSON object!")
+        return None
+
+
+def request_provider_data_context(
+    headers: dict[str, str], auth_attributes: dict[str, list[str]] | None = None
+) -> AbstractContextManager:
+    """Context manager that sets request provider data from headers and auth attributes for the duration of the context"""
+    provider_data = parse_request_provider_data(headers)
+    return RequestProviderDataContext(provider_data, auth_attributes)
+
+
+def get_authenticated_user() -> User | None:
+    """Helper to retrieve auth attributes from the provider data context"""
+    provider_data = PROVIDER_DATA_VAR.get()
+    if not provider_data:
+        return None
+    return provider_data.get("__authenticated_user")
+
+
+def user_from_scope(scope: dict) -> User | None:
+    """Create a User object from ASGI scope data (set by authentication middleware)"""
+    user_attributes = scope.get("user_attributes", {})
+    principal = scope.get("principal", "")
+
+    # auth not enabled
+    if not principal and not user_attributes:
+        return None
+
+    return User(principal=principal, attributes=user_attributes)
--- a/Show more
+++ b/Show more