Merge branch 'main' into vllm_health_check

2025-12-20 04:38:42 +00:00 · 2025-06-05 18:09:36 +05:30 · 2025-06-05 18:09:36 +05:30 · c18b585d32
commit c18b585d32
parent 44401f0a88 ef885d2147
143 changed files with 9210 additions and 5347 deletions
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -37,6 +37,7 @@ from .openai_responses import (
    OpenAIResponseInputTool,
    OpenAIResponseObject,
    OpenAIResponseObjectStream,
+    OpenAIResponseText,
 )

 # TODO: use enum.StrEnum when we drop support for python 3.10
@ -603,7 +604,9 @@ class Agents(Protocol):
        store: bool | None = True,
        stream: bool | None = False,
        temperature: float | None = None,
+        text: OpenAIResponseText | None = None,
        tools: list[OpenAIResponseInputTool] | None = None,
+        max_infer_iters: int | None = 10,  # this is an extension to the OpenAI API
    ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
        """Create a new OpenAI response.

--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -7,6 +7,7 @@
 from typing import Annotated, Any, Literal

 from pydantic import BaseModel, Field
+from typing_extensions import TypedDict

 from llama_stack.schema_utils import json_schema_type, register_schema

@ -126,6 +127,32 @@ OpenAIResponseOutput = Annotated[
 register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")


+# This has to be a TypedDict because we need a "schema" field and our strong
+# typing code in the schema generator doesn't support Pydantic aliases. That also
+# means we can't use a discriminator field here, because TypedDicts don't support
+# default values which the strong typing code requires for discriminators.
+class OpenAIResponseTextFormat(TypedDict, total=False):
+    """Configuration for Responses API text format.
+
+    :param type: Must be "text", "json_schema", or "json_object" to identify the format type
+    :param name: The name of the response format. Only used for json_schema.
+    :param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema.
+    :param description: (Optional) A description of the response format. Only used for json_schema.
+    :param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema.
+    """
+
+    type: Literal["text"] | Literal["json_schema"] | Literal["json_object"]
+    name: str | None
+    schema: dict[str, Any] | None
+    description: str | None
+    strict: bool | None
+
+
+@json_schema_type
+class OpenAIResponseText(BaseModel):
+    format: OpenAIResponseTextFormat | None = None
+
+
@json_schema_type
 class OpenAIResponseObject(BaseModel):
    created_at: int
@ -138,6 +165,9 @@ class OpenAIResponseObject(BaseModel):
    previous_response_id: str | None = None
    status: str
    temperature: float | None = None
+    # Default to text format to avoid breaking the loading of old responses
+    # before the field was added. New responses will have this set always.
+    text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
    top_p: float | None = None
    truncation: str | None = None
    user: str | None = None
@ -149,6 +179,30 @@ class OpenAIResponseObjectStreamResponseCreated(BaseModel):
    type: Literal["response.created"] = "response.created"


+@json_schema_type
+class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
+    response: OpenAIResponseObject
+    type: Literal["response.completed"] = "response.completed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
+    response_id: str
+    item: OpenAIResponseOutput
+    output_index: int
+    sequence_number: int
+    type: Literal["response.output_item.added"] = "response.output_item.added"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
+    response_id: str
+    item: OpenAIResponseOutput
+    output_index: int
+    sequence_number: int
+    type: Literal["response.output_item.done"] = "response.output_item.done"
+
+
@json_schema_type
 class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
    content_index: int
@ -160,14 +214,132 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):


@json_schema_type
-class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
-    response: OpenAIResponseObject
-    type: Literal["response.completed"] = "response.completed"
+class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
+    content_index: int
+    text: str  # final text of the output item
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.output_text.done"] = "response.output_text.done"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
+    delta: str
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
+    arguments: str  # final arguments of the function call
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.web_search_call.in_progress"] = "response.web_search_call.in_progress"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel):
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.web_search_call.searching"] = "response.web_search_call.searching"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.web_search_call.completed"] = "response.web_search_call.completed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel):
+    sequence_number: int
+    type: Literal["response.mcp_list_tools.in_progress"] = "response.mcp_list_tools.in_progress"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel):
+    sequence_number: int
+    type: Literal["response.mcp_list_tools.failed"] = "response.mcp_list_tools.failed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel):
+    sequence_number: int
+    type: Literal["response.mcp_list_tools.completed"] = "response.mcp_list_tools.completed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel):
+    delta: str
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.mcp_call.arguments.delta"] = "response.mcp_call.arguments.delta"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel):
+    arguments: str  # final arguments of the MCP call
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.mcp_call.arguments.done"] = "response.mcp_call.arguments.done"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
+    item_id: str
+    output_index: int
+    sequence_number: int
+    type: Literal["response.mcp_call.in_progress"] = "response.mcp_call.in_progress"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
+    sequence_number: int
+    type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
+    sequence_number: int
+    type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed"


 OpenAIResponseObjectStream = Annotated[
    OpenAIResponseObjectStreamResponseCreated
+    | OpenAIResponseObjectStreamResponseOutputItemAdded
+    | OpenAIResponseObjectStreamResponseOutputItemDone
    | OpenAIResponseObjectStreamResponseOutputTextDelta
+    | OpenAIResponseObjectStreamResponseOutputTextDone
+    | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+    | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+    | OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+    | OpenAIResponseObjectStreamResponseWebSearchCallSearching
+    | OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+    | OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+    | OpenAIResponseObjectStreamResponseMcpListToolsFailed
+    | OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+    | OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+    | OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+    | OpenAIResponseObjectStreamResponseMcpCallInProgress
+    | OpenAIResponseObjectStreamResponseMcpCallFailed
+    | OpenAIResponseObjectStreamResponseMcpCallCompleted
    | OpenAIResponseObjectStreamResponseCompleted,
    Field(discriminator="type"),
 ]
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -4,179 +4,158 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import Protocol, runtime_checkable
+from enum import Enum
+from typing import Annotated, Literal, Protocol, runtime_checkable

+from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel

+from llama_stack.apis.common.responses import Order
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod


-@json_schema_type
-class FileUploadResponse(BaseModel):
+# OpenAI Files API Models
+class OpenAIFilePurpose(str, Enum):
+    """
+    Valid purpose values for OpenAI Files API.
    """
-    Response after initiating a file upload session.

-    :param id: ID of the upload session
-    :param url: Upload URL for the file or file parts
-    :param offset: Upload content offset
-    :param size: Upload content size
+    ASSISTANTS = "assistants"
+    # TODO: Add other purposes as needed
+
+
+@json_schema_type
+class OpenAIFileObject(BaseModel):
+    """
+    OpenAI File object as defined in the OpenAI Files API.
+
+    :param object: The object type, which is always "file"
+    :param id: The file identifier, which can be referenced in the API endpoints
+    :param bytes: The size of the file, in bytes
+    :param created_at: The Unix timestamp (in seconds) for when the file was created
+    :param expires_at: The Unix timestamp (in seconds) for when the file expires
+    :param filename: The name of the file
+    :param purpose: The intended purpose of the file
+    """
+
+    object: Literal["file"] = "file"
+    id: str
+    bytes: int
+    created_at: int
+    expires_at: int
+    filename: str
+    purpose: OpenAIFilePurpose
+
+
+@json_schema_type
+class ListOpenAIFileResponse(BaseModel):
+    """
+    Response for listing files in OpenAI Files API.
+
+    :param data: List of file objects
+    :param object: The object type, which is always "list"
+    """
+
+    data: list[OpenAIFileObject]
+    has_more: bool
+    first_id: str
+    last_id: str
+    object: Literal["list"] = "list"
+
+
+@json_schema_type
+class OpenAIFileDeleteResponse(BaseModel):
+    """
+    Response for deleting a file in OpenAI Files API.
+
+    :param id: The file identifier that was deleted
+    :param object: The object type, which is always "file"
+    :param deleted: Whether the file was successfully deleted
    """

    id: str
-    url: str
-    offset: int
-    size: int
-
-
-@json_schema_type
-class BucketResponse(BaseModel):
-    name: str
-
-
-@json_schema_type
-class ListBucketResponse(BaseModel):
-    """
-    Response representing a list of file entries.
-
-    :param data: List of FileResponse entries
-    """
-
-    data: list[BucketResponse]
-
-
-@json_schema_type
-class FileResponse(BaseModel):
-    """
-    Response representing a file entry.
-
-    :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
-    :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
-    :param mime_type: MIME type of the file
-    :param url: Upload URL for the file contents
-    :param bytes: Size of the file in bytes
-    :param created_at: Timestamp of when the file was created
-    """
-
-    bucket: str
-    key: str
-    mime_type: str
-    url: str
-    bytes: int
-    created_at: int
-
-
-@json_schema_type
-class ListFileResponse(BaseModel):
-    """
-    Response representing a list of file entries.
-
-    :param data: List of FileResponse entries
-    """
-
-    data: list[FileResponse]
+    object: Literal["file"] = "file"
+    deleted: bool


@runtime_checkable
@trace_protocol
 class Files(Protocol):
-    @webmethod(route="/files", method="POST")
-    async def create_upload_session(
+    # OpenAI Files API Endpoints
+    @webmethod(route="/openai/v1/files", method="POST")
+    async def openai_upload_file(
        self,
-        bucket: str,
-        key: str,
-        mime_type: str,
-        size: int,
-    ) -> FileUploadResponse:
+        file: Annotated[UploadFile, File()],
+        purpose: Annotated[OpenAIFilePurpose, Form()],
+    ) -> OpenAIFileObject:
        """
-        Create a new upload session for a file identified by a bucket and key.
+        Upload a file that can be used across various endpoints.

-        :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-).
-        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
-        :param mime_type: MIME type of the file.
-        :param size: File size in bytes.
-        :returns: A FileUploadResponse.
+        The file upload should be a multipart form request with:
+        - file: The File object (not file name) to be uploaded.
+        - purpose: The intended purpose of the uploaded file.
+
+        :param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
+        :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
+        :returns: An OpenAIFileObject representing the uploaded file.
        """
        ...

-    @webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True)
-    async def upload_content_to_session(
+    @webmethod(route="/openai/v1/files", method="GET")
+    async def openai_list_files(
        self,
-        upload_id: str,
-    ) -> FileResponse | None:
+        after: str | None = None,
+        limit: int | None = 10000,
+        order: Order | None = Order.desc,
+        purpose: OpenAIFilePurpose | None = None,
+    ) -> ListOpenAIFileResponse:
        """
-        Upload file content to an existing upload session.
-        On the server, request body will have the raw bytes that are uploaded.
+        Returns a list of files that belong to the user's organization.

-        :param upload_id: ID of the upload session.
-        :returns: A FileResponse or None if the upload is not complete.
+        :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
+        :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the default is 10,000.
+        :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
+        :param purpose: Only return files with the given purpose.
+        :returns: An ListOpenAIFileResponse containing the list of files.
        """
        ...

-    @webmethod(route="/files/session:{upload_id}", method="GET")
-    async def get_upload_session_info(
+    @webmethod(route="/openai/v1/files/{file_id}", method="GET")
+    async def openai_retrieve_file(
        self,
-        upload_id: str,
-    ) -> FileUploadResponse:
+        file_id: str,
+    ) -> OpenAIFileObject:
        """
-        Returns information about an existsing upload session.
+        Returns information about a specific file.

-        :param upload_id: ID of the upload session.
-        :returns: A FileUploadResponse.
+        :param file_id: The ID of the file to use for this request.
+        :returns: An OpenAIFileObject containing file information.
        """
        ...

-    @webmethod(route="/files", method="GET")
-    async def list_all_buckets(
+    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
+    async def openai_delete_file(
        self,
-        bucket: str,
-    ) -> ListBucketResponse:
+        file_id: str,
+    ) -> OpenAIFileDeleteResponse:
        """
-        List all buckets.
+        Delete a file.

-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
-        :returns: A ListBucketResponse.
+        :param file_id: The ID of the file to use for this request.
+        :returns: An OpenAIFileDeleteResponse indicating successful deletion.
        """
        ...

-    @webmethod(route="/files/{bucket}", method="GET")
-    async def list_files_in_bucket(
+    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
+    async def openai_retrieve_file_content(
        self,
-        bucket: str,
-    ) -> ListFileResponse:
+        file_id: str,
+    ) -> Response:
        """
-        List all files in a bucket.
+        Returns the contents of the specified file.

-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
-        :returns: A ListFileResponse.
-        """
-        ...
-
-    @webmethod(route="/files/{bucket}/{key:path}", method="GET")
-    async def get_file(
-        self,
-        bucket: str,
-        key: str,
-    ) -> FileResponse:
-        """
-        Get a file info identified by a bucket and key.
-
-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
-        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
-        :returns: A FileResponse.
-        """
-        ...
-
-    @webmethod(route="/files/{bucket}/{key:path}", method="DELETE")
-    async def delete_file(
-        self,
-        bucket: str,
-        key: str,
-    ) -> None:
-        """
-        Delete a file identified by a bucket and key.
-
-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
-        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
+        :param file_id: The ID of the file to use for this request.
+        :returns: The raw file content as a binary response.
        """
        ...
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -35,7 +35,8 @@ class StackRun(Subcommand):
            "config",
            type=str,
            nargs="?",  # Make it optional
-            help="Path to config file to use for the run. Required for venv and conda environments.",
+            metavar="config | template",
+            help="Path to config file to use for the run or name of known template (`llama stack list` for a list).",
        )
        self.parser.add_argument(
            "--port",
@ -59,7 +60,7 @@ class StackRun(Subcommand):
            "--image-type",
            type=str,
            help="Image Type used during the build. This can be either conda or container or venv.",
-            choices=[e.value for e in ImageType],
+            choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
        )
        self.parser.add_argument(
            "--enable-ui",
@ -154,7 +155,10 @@ class StackRun(Subcommand):
                # func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
                if callable(getattr(args, arg)):
                    continue
-                setattr(server_args, arg, getattr(args, arg))
+                if arg == "config" and template_name:
+                    server_args.config = str(config_file)
+                else:
+                    setattr(server_args, arg, getattr(args, arg))

            # Run the server
            server_main(server_args)
--- a/llama_stack/distribution/access_control.py
+++ b/llama_stack/distribution/access_control.py
@ -1,86 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any
-
-from llama_stack.distribution.datatypes import AccessAttributes
-from llama_stack.log import get_logger
-
-logger = get_logger(__name__, category="core")
-
-
-def check_access(
-    obj_identifier: str,
-    obj_attributes: AccessAttributes | None,
-    user_attributes: dict[str, Any] | None = None,
-) -> bool:
-    """Check if the current user has access to the given object, based on access attributes.
-
-    Access control algorithm:
-    1. If the resource has no access_attributes, access is GRANTED to all authenticated users
-    2. If the user has no attributes, access is DENIED to any object with access_attributes defined
-    3. For each attribute category in the resource's access_attributes:
-       a. If the user lacks that category, access is DENIED
-       b. If the user has the category but none of the required values, access is DENIED
-       c. If the user has at least one matching value in each required category, access is GRANTED
-
-    Example:
-        # Resource requires:
-        access_attributes = AccessAttributes(
-            roles=["admin", "data-scientist"],
-            teams=["ml-team"]
-        )
-
-        # User has:
-        user_attributes = {
-            "roles": ["data-scientist", "engineer"],
-            "teams": ["ml-team", "infra-team"],
-            "projects": ["llama-3"]
-        }
-
-        # Result: Access GRANTED
-        # - User has the "data-scientist" role (matches one of the required roles)
-        # - AND user is part of the "ml-team" (matches the required team)
-        # - The extra "projects" attribute is ignored
-
-    Args:
-        obj_identifier: The identifier of the resource object to check access for
-        obj_attributes: The access attributes of the resource object
-        user_attributes: The attributes of the current user
-
-    Returns:
-        bool: True if access is granted, False if denied
-    """
-    # If object has no access attributes, allow access by default
-    if not obj_attributes:
-        return True
-
-    # If no user attributes, deny access to objects with access control
-    if not user_attributes:
-        return False
-
-    dict_attribs = obj_attributes.model_dump(exclude_none=True)
-    if not dict_attribs:
-        return True
-
-    # Check each attribute category (requires ALL categories to match)
-    # TODO: formalize this into a proper ABAC policy
-    for attr_key, required_values in dict_attribs.items():
-        user_values = user_attributes.get(attr_key, [])
-
-        if not user_values:
-            logger.debug(f"Access denied to {obj_identifier}: missing required attribute category '{attr_key}'")
-            return False
-
-        if not any(val in user_values for val in required_values):
-            logger.debug(
-                f"Access denied to {obj_identifier}: "
-                f"no match for attribute '{attr_key}', required one of {required_values}"
-            )
-            return False
-
-    logger.debug(f"Access granted to {obj_identifier}")
-    return True
--- a/llama_stack/distribution/access_control/init.py
+++ b/llama_stack/distribution/access_control/init.py
@ -3,5 +3,3 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-
-from .verification import get_distribution_template  # noqa: F401
--- a/llama_stack/distribution/access_control/access_control.py
+++ b/llama_stack/distribution/access_control/access_control.py
@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.distribution.datatypes import User
+
+from .conditions import (
+    Condition,
+    ProtectedResource,
+    parse_conditions,
+)
+from .datatypes import (
+    AccessRule,
+    Action,
+    Scope,
+)
+
+
+def matches_resource(resource_scope: str, actual_resource: str) -> bool:
+    if resource_scope == actual_resource:
+        return True
+    return resource_scope.endswith("::*") and actual_resource.startswith(resource_scope[:-1])
+
+
+def matches_scope(
+    scope: Scope,
+    action: Action,
+    resource: str,
+    user: str | None,
+) -> bool:
+    if scope.resource and not matches_resource(scope.resource, resource):
+        return False
+    if scope.principal and scope.principal != user:
+        return False
+    return action in scope.actions
+
+
+def as_list(obj: Any) -> list[Any]:
+    if isinstance(obj, list):
+        return obj
+    return [obj]
+
+
+def matches_conditions(
+    conditions: list[Condition],
+    resource: ProtectedResource,
+    user: User,
+) -> bool:
+    for condition in conditions:
+        # must match all conditions
+        if not condition.matches(resource, user):
+            return False
+    return True
+
+
+def default_policy() -> list[AccessRule]:
+    # for backwards compatibility, if no rules are provided, assume
+    # full access subject to previous attribute matching rules
+    return [
+        AccessRule(
+            permit=Scope(actions=list(Action)),
+            when=["user in owners " + name for name in ["roles", "teams", "projects", "namespaces"]],
+        ),
+    ]
+
+
+def is_action_allowed(
+    policy: list[AccessRule],
+    action: Action,
+    resource: ProtectedResource,
+    user: User | None,
+) -> bool:
+    # If user is not set, assume authentication is not enabled
+    if not user:
+        return True
+
+    if not len(policy):
+        policy = default_policy()
+
+    qualified_resource_id = resource.type + "::" + resource.identifier
+    for rule in policy:
+        if rule.forbid and matches_scope(rule.forbid, action, qualified_resource_id, user.principal):
+            if rule.when:
+                if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
+                    return False
+            elif rule.unless:
+                if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
+                    return False
+            else:
+                return False
+        elif rule.permit and matches_scope(rule.permit, action, qualified_resource_id, user.principal):
+            if rule.when:
+                if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
+                    return True
+            elif rule.unless:
+                if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
+                    return True
+            else:
+                return True
+    # assume access is denied unless we find a rule that permits access
+    return False
+
+
+class AccessDeniedError(RuntimeError):
+    pass
--- a/llama_stack/distribution/access_control/conditions.py
+++ b/llama_stack/distribution/access_control/conditions.py
@ -0,0 +1,129 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Protocol
+
+
+class User(Protocol):
+    principal: str
+    attributes: dict[str, list[str]] | None
+
+
+class ProtectedResource(Protocol):
+    type: str
+    identifier: str
+    owner: User
+
+
+class Condition(Protocol):
+    def matches(self, resource: ProtectedResource, user: User) -> bool: ...
+
+
+class UserInOwnersList:
+    def __init__(self, name: str):
+        self.name = name
+
+    def owners_values(self, resource: ProtectedResource) -> list[str] | None:
+        if (
+            hasattr(resource, "owner")
+            and resource.owner
+            and resource.owner.attributes
+            and self.name in resource.owner.attributes
+        ):
+            return resource.owner.attributes[self.name]
+        else:
+            return None
+
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        required = self.owners_values(resource)
+        if not required:
+            return True
+        if not user.attributes or self.name not in user.attributes or not user.attributes[self.name]:
+            return False
+        user_values = user.attributes[self.name]
+        for value in required:
+            if value in user_values:
+                return True
+        return False
+
+    def __repr__(self):
+        return f"user in owners {self.name}"
+
+
+class UserNotInOwnersList(UserInOwnersList):
+    def __init__(self, name: str):
+        super().__init__(name)
+
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        return not super().matches(resource, user)
+
+    def __repr__(self):
+        return f"user not in owners {self.name}"
+
+
+class UserWithValueInList:
+    def __init__(self, name: str, value: str):
+        self.name = name
+        self.value = value
+
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        if user.attributes and self.name in user.attributes:
+            return self.value in user.attributes[self.name]
+        print(f"User does not have {self.value} in {self.name}")
+        return False
+
+    def __repr__(self):
+        return f"user with {self.value} in {self.name}"
+
+
+class UserWithValueNotInList(UserWithValueInList):
+    def __init__(self, name: str, value: str):
+        super().__init__(name, value)
+
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        return not super().matches(resource, user)
+
+    def __repr__(self):
+        return f"user with {self.value} not in {self.name}"
+
+
+class UserIsOwner:
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        return resource.owner.principal == user.principal if resource.owner else False
+
+    def __repr__(self):
+        return "user is owner"
+
+
+class UserIsNotOwner:
+    def matches(self, resource: ProtectedResource, user: User) -> bool:
+        return not resource.owner or resource.owner.principal != user.principal
+
+    def __repr__(self):
+        return "user is not owner"
+
+
+def parse_condition(condition: str) -> Condition:
+    words = condition.split()
+    match words:
+        case ["user", "is", "owner"]:
+            return UserIsOwner()
+        case ["user", "is", "not", "owner"]:
+            return UserIsNotOwner()
+        case ["user", "with", value, "in", name]:
+            return UserWithValueInList(name, value)
+        case ["user", "with", value, "not", "in", name]:
+            return UserWithValueNotInList(name, value)
+        case ["user", "in", "owners", name]:
+            return UserInOwnersList(name)
+        case ["user", "not", "in", "owners", name]:
+            return UserNotInOwnersList(name)
+        case _:
+            raise ValueError(f"Invalid condition: {condition}")
+
+
+def parse_conditions(conditions: list[str]) -> list[Condition]:
+    return [parse_condition(c) for c in conditions]
--- a/llama_stack/distribution/access_control/datatypes.py
+++ b/llama_stack/distribution/access_control/datatypes.py
@ -0,0 +1,107 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from enum import Enum
+
+from pydantic import BaseModel, model_validator
+from typing_extensions import Self
+
+from .conditions import parse_conditions
+
+
+class Action(str, Enum):
+    CREATE = "create"
+    READ = "read"
+    UPDATE = "update"
+    DELETE = "delete"
+
+
+class Scope(BaseModel):
+    principal: str | None = None
+    actions: Action | list[Action]
+    resource: str | None = None
+
+
+def _mutually_exclusive(obj, a: str, b: str):
+    if getattr(obj, a) and getattr(obj, b):
+        raise ValueError(f"{a} and {b} are mutually exclusive")
+
+
+def _require_one_of(obj, a: str, b: str):
+    if not getattr(obj, a) and not getattr(obj, b):
+        raise ValueError(f"on of {a} or {b} is required")
+
+
+class AccessRule(BaseModel):
+    """Access rule based loosely on cedar policy language
+
+    A rule defines a list of action either to permit or to forbid. It may specify a
+    principal or a resource that must match for the rule to take effect. The resource
+    to match should be specified in the form of a type qualified identifier, e.g.
+    model::my-model or vector_db::some-db, or a wildcard for all resources of a type,
+    e.g. model::*. If the principal or resource are not specified, they will match all
+    requests.
+
+    A rule may also specify a condition, either a 'when' or an 'unless', with additional
+    constraints as to where the rule applies. The constraints supported at present are:
+
+    - 'user with <attr-value> in <attr-name>'
+    - 'user with <attr-value> not in <attr-name>'
+    - 'user is owner'
+    - 'user is not owner'
+    - 'user in owners <attr-name>'
+    - 'user not in owners <attr-name>'
+
+    Rules are tested in order to find a match. If a match is found, the request is
+    permitted or forbidden depending on the type of rule. If no match is found, the
+    request is denied. If no rules are specified, a rule that allows any action as
+    long as the resource attributes match the user attributes is added
+    (i.e. the previous behaviour is the default).
+
+    Some examples in yaml:
+
+    - permit:
+        principal: user-1
+        actions: [create, read, delete]
+        resource: model::*
+      description: user-1 has full access to all models
+    - permit:
+        principal: user-2
+        actions: [read]
+        resource: model::model-1
+      description: user-2 has read access to model-1 only
+    - permit:
+        actions: [read]
+      when: user in owner teams
+      description: any user has read access to any resource created by a member of their team
+    - forbid:
+        actions: [create, read, delete]
+        resource: vector_db::*
+      unless: user with admin in roles
+      description: only user with admin role can use vector_db resources
+
+    """
+
+    permit: Scope | None = None
+    forbid: Scope | None = None
+    when: str | list[str] | None = None
+    unless: str | list[str] | None = None
+    description: str | None = None
+
+    @model_validator(mode="after")
+    def validate_rule_format(self) -> Self:
+        _require_one_of(self, "permit", "forbid")
+        _mutually_exclusive(self, "permit", "forbid")
+        _mutually_exclusive(self, "when", "unless")
+        if isinstance(self.when, list):
+            parse_conditions(self.when)
+        elif self.when:
+            parse_conditions([self.when])
+        if isinstance(self.unless, list):
+            parse_conditions(self.unless)
+        elif self.unless:
+            parse_conditions([self.unless])
+        return self
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -29,6 +29,8 @@ SERVER_DEPENDENCIES = [
    "fire",
    "httpx",
    "uvicorn",
+    "opentelemetry-sdk",
+    "opentelemetry-exporter-otlp-proto-http",
 ]


--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -24,6 +24,7 @@ from llama_stack.apis.shields import Shield, ShieldInput
 from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
+from llama_stack.distribution.access_control.datatypes import AccessRule
 from llama_stack.providers.datatypes import Api, ProviderSpec
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
@ -35,126 +36,66 @@ LLAMA_STACK_RUN_CONFIG_VERSION = "2"
 RoutingKey = str | list[str]


-class AccessAttributes(BaseModel):
-    """Structured representation of user attributes for access control.
+class User(BaseModel):
+    principal: str
+    # further attributes that may be used for access control decisions
+    attributes: dict[str, list[str]] | None = None

-    This model defines a structured approach to representing user attributes
-    with common standard categories for access control.
-
-    Standard attribute categories include:
-    - roles: Role-based attributes (e.g., admin, data-scientist)
-    - teams: Team-based attributes (e.g., ml-team, infra-team)
-    - projects: Project access attributes (e.g., llama-3, customer-insights)
-    - namespaces: Namespace-based access control for resource isolation
-    """
-
-    # Standard attribute categories - the minimal set we need now
-    roles: list[str] | None = Field(
-        default=None, description="Role-based attributes (e.g., 'admin', 'data-scientist', 'user')"
-    )
-
-    teams: list[str] | None = Field(default=None, description="Team-based attributes (e.g., 'ml-team', 'nlp-team')")
-
-    projects: list[str] | None = Field(
-        default=None, description="Project-based access attributes (e.g., 'llama-3', 'customer-insights')"
-    )
-
-    namespaces: list[str] | None = Field(
-        default=None, description="Namespace-based access control for resource isolation"
-    )
+    def __init__(self, principal: str, attributes: dict[str, list[str]] | None):
+        super().__init__(principal=principal, attributes=attributes)


-class ResourceWithACL(Resource):
-    """Extension of Resource that adds attribute-based access control capabilities.
+class ResourceWithOwner(Resource):
+    """Extension of Resource that adds an optional owner, i.e. the user that created the
+    resource. This can be used to constrain access to the resource."""

-    This class adds an optional access_attributes field that allows fine-grained control
-    over which users can access each resource. When attributes are defined, a user must have
-    matching attributes to access the resource.
-
-    Attribute Matching Algorithm:
-    1. If a resource has no access_attributes (None or empty dict), it's visible to all authenticated users
-    2. Each key in access_attributes represents an attribute category (e.g., "roles", "teams", "projects")
-    3. The matching algorithm requires ALL categories to match (AND relationship between categories)
-    4. Within each category, ANY value match is sufficient (OR relationship within a category)
-
-    Examples:
-        # Resource visible to everyone (no access control)
-        model = Model(identifier="llama-2", ...)
-
-        # Resource visible only to admins
-        model = Model(
-            identifier="gpt-4",
-            access_attributes=AccessAttributes(roles=["admin"])
-        )
-
-        # Resource visible to data scientists on the ML team
-        model = Model(
-            identifier="private-model",
-            access_attributes=AccessAttributes(
-                roles=["data-scientist", "researcher"],
-                teams=["ml-team"]
-            )
-        )
-        # ^ User must have at least one of the roles AND be on the ml-team
-
-        # Resource visible to users with specific project access
-        vector_db = VectorDB(
-            identifier="customer-embeddings",
-            access_attributes=AccessAttributes(
-                projects=["customer-insights"],
-                namespaces=["confidential"]
-            )
-        )
-        # ^ User must have access to the customer-insights project AND have confidential namespace
-    """
-
-    access_attributes: AccessAttributes | None = None
+    owner: User | None = None


 # Use the extended Resource for all routable objects
-class ModelWithACL(Model, ResourceWithACL):
+class ModelWithOwner(Model, ResourceWithOwner):
    pass


-class ShieldWithACL(Shield, ResourceWithACL):
+class ShieldWithOwner(Shield, ResourceWithOwner):
    pass


-class VectorDBWithACL(VectorDB, ResourceWithACL):
+class VectorDBWithOwner(VectorDB, ResourceWithOwner):
    pass


-class DatasetWithACL(Dataset, ResourceWithACL):
+class DatasetWithOwner(Dataset, ResourceWithOwner):
    pass


-class ScoringFnWithACL(ScoringFn, ResourceWithACL):
+class ScoringFnWithOwner(ScoringFn, ResourceWithOwner):
    pass


-class BenchmarkWithACL(Benchmark, ResourceWithACL):
+class BenchmarkWithOwner(Benchmark, ResourceWithOwner):
    pass


-class ToolWithACL(Tool, ResourceWithACL):
+class ToolWithOwner(Tool, ResourceWithOwner):
    pass


-class ToolGroupWithACL(ToolGroup, ResourceWithACL):
+class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
    pass


 RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | Tool | ToolGroup

 RoutableObjectWithProvider = Annotated[
-    ModelWithACL
-    | ShieldWithACL
-    | VectorDBWithACL
-    | DatasetWithACL
-    | ScoringFnWithACL
-    | BenchmarkWithACL
-    | ToolWithACL
-    | ToolGroupWithACL,
+    ModelWithOwner
+    | ShieldWithOwner
+    | VectorDBWithOwner
+    | DatasetWithOwner
+    | ScoringFnWithOwner
+    | BenchmarkWithOwner
+    | ToolWithOwner
+    | ToolGroupWithOwner,
    Field(discriminator="type"),
 ]

@ -234,6 +175,7 @@ class AuthenticationConfig(BaseModel):
        ...,
        description="Provider-specific configuration",
    )
+    access_policy: list[AccessRule] = Field(default=[], description="Rules for determining access to resources")


 class AuthenticationRequiredError(Exception):
--- a/llama_stack/distribution/request_headers.py
+++ b/llama_stack/distribution/request_headers.py
@ -10,6 +10,8 @@ import logging
 from contextlib import AbstractContextManager
 from typing import Any

+from llama_stack.distribution.datatypes import User
+
 from .utils.dynamic import instantiate_class_type

 log = logging.getLogger(__name__)
@ -21,12 +23,10 @@ PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)
 class RequestProviderDataContext(AbstractContextManager):
    """Context manager for request provider data"""

-    def __init__(
-        self, provider_data: dict[str, Any] | None = None, auth_attributes: dict[str, list[str]] | None = None
-    ):
+    def __init__(self, provider_data: dict[str, Any] | None = None, user: User | None = None):
        self.provider_data = provider_data or {}
-        if auth_attributes:
-            self.provider_data["__auth_attributes"] = auth_attributes
+        if user:
+            self.provider_data["__authenticated_user"] = user

        self.token = None

@ -95,9 +95,9 @@ def request_provider_data_context(
    return RequestProviderDataContext(provider_data, auth_attributes)


-def get_auth_attributes() -> dict[str, list[str]] | None:
+def get_authenticated_user() -> User | None:
    """Helper to retrieve auth attributes from the provider data context"""
    provider_data = PROVIDER_DATA_VAR.get()
    if not provider_data:
        return None
-    return provider_data.get("__auth_attributes")
+    return provider_data.get("__authenticated_user")
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -28,6 +28,7 @@ from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.distribution.client import get_client_impl
 from llama_stack.distribution.datatypes import (
+    AccessRule,
    AutoRoutedProviderSpec,
    Provider,
    RoutingTableProviderSpec,
@ -118,6 +119,7 @@ async def resolve_impls(
    run_config: StackRunConfig,
    provider_registry: ProviderRegistry,
    dist_registry: DistributionRegistry,
+    policy: list[AccessRule],
 ) -> dict[Api, Any]:
    """
    Resolves provider implementations by:
@ -140,7 +142,7 @@ async def resolve_impls(

    sorted_providers = sort_providers_by_deps(providers_with_specs, run_config)

-    return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config)
+    return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config, policy)


 def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, dict[str, ProviderWithSpec]]:
@ -247,6 +249,7 @@ async def instantiate_providers(
    router_apis: set[Api],
    dist_registry: DistributionRegistry,
    run_config: StackRunConfig,
+    policy: list[AccessRule],
 ) -> dict:
    """Instantiates providers asynchronously while managing dependencies."""
    impls: dict[Api, Any] = {}
@ -261,7 +264,7 @@ async def instantiate_providers(
        if isinstance(provider.spec, RoutingTableProviderSpec):
            inner_impls = inner_impls_by_provider_id[f"inner-{provider.spec.router_api.value}"]

-        impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config)
+        impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config, policy)

        if api_str.startswith("inner-"):
            inner_impls_by_provider_id[api_str][provider.provider_id] = impl
@ -312,6 +315,7 @@ async def instantiate_provider(
    inner_impls: dict[str, Any],
    dist_registry: DistributionRegistry,
    run_config: StackRunConfig,
+    policy: list[AccessRule],
 ):
    provider_spec = provider.spec
    if not hasattr(provider_spec, "module"):
@ -336,13 +340,15 @@ async def instantiate_provider(
        method = "get_routing_table_impl"

        config = None
-        args = [provider_spec.api, inner_impls, deps, dist_registry]
+        args = [provider_spec.api, inner_impls, deps, dist_registry, policy]
    else:
        method = "get_provider_impl"

        config_type = instantiate_class_type(provider_spec.config_class)
        config = config_type(**provider.config)
        args = [config, deps]
+        if "policy" in inspect.signature(getattr(module, method)).parameters:
+            args.append(policy)

    fn = getattr(module, method)
    impl = await fn(*args)
--- a/llama_stack/distribution/routers/init.py
+++ b/llama_stack/distribution/routers/init.py
@ -6,7 +6,7 @@

 from typing import Any

-from llama_stack.distribution.datatypes import RoutedProtocol
+from llama_stack.distribution.datatypes import AccessRule, RoutedProtocol
 from llama_stack.distribution.stack import StackRunConfig
 from llama_stack.distribution.store import DistributionRegistry
 from llama_stack.providers.datatypes import Api, RoutingTable
@ -18,6 +18,7 @@ async def get_routing_table_impl(
    impls_by_provider_id: dict[str, RoutedProtocol],
    _deps,
    dist_registry: DistributionRegistry,
+    policy: list[AccessRule],
 ) -> Any:
    from ..routing_tables.benchmarks import BenchmarksRoutingTable
    from ..routing_tables.datasets import DatasetsRoutingTable
@ -40,7 +41,7 @@ async def get_routing_table_impl(
    if api.value not in api_to_tables:
        raise ValueError(f"API {api.value} not found in router map")

-    impl = api_to_tables[api.value](impls_by_provider_id, dist_registry)
+    impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy)
    await impl.initialize()
    return impl

--- a/llama_stack/distribution/routing_tables/benchmarks.py
+++ b/llama_stack/distribution/routing_tables/benchmarks.py
@ -8,7 +8,7 @@ from typing import Any

 from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
 from llama_stack.distribution.datatypes import (
-    BenchmarkWithACL,
+    BenchmarkWithOwner,
 )
 from llama_stack.log import get_logger

@ -47,7 +47,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
                )
        if provider_benchmark_id is None:
            provider_benchmark_id = benchmark_id
-        benchmark = BenchmarkWithACL(
+        benchmark = BenchmarkWithOwner(
            identifier=benchmark_id,
            dataset_id=dataset_id,
            scoring_functions=scoring_functions,
--- a/llama_stack/distribution/routing_tables/common.py
+++ b/llama_stack/distribution/routing_tables/common.py
@ -8,14 +8,14 @@ from typing import Any

 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.scoring_functions import ScoringFn
-from llama_stack.distribution.access_control import check_access
+from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.distribution.datatypes import (
-    AccessAttributes,
+    AccessRule,
    RoutableObject,
    RoutableObjectWithProvider,
    RoutedProtocol,
 )
-from llama_stack.distribution.request_headers import get_auth_attributes
+from llama_stack.distribution.request_headers import get_authenticated_user
 from llama_stack.distribution.store import DistributionRegistry
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, RoutingTable
@ -73,9 +73,11 @@ class CommonRoutingTableImpl(RoutingTable):
        self,
        impls_by_provider_id: dict[str, RoutedProtocol],
        dist_registry: DistributionRegistry,
+        policy: list[AccessRule],
    ) -> None:
        self.impls_by_provider_id = impls_by_provider_id
        self.dist_registry = dist_registry
+        self.policy = policy

    async def initialize(self) -> None:
        async def add_objects(objs: list[RoutableObjectWithProvider], provider_id: str, cls) -> None:
@ -166,13 +168,15 @@ class CommonRoutingTableImpl(RoutingTable):
            return None

        # Check if user has permission to access this object
-        if not check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes()):
-            logger.debug(f"Access denied to {type} '{identifier}' based on attribute mismatch")
+        if not is_action_allowed(self.policy, "read", obj, get_authenticated_user()):
+            logger.debug(f"Access denied to {type} '{identifier}'")
            return None

        return obj

    async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
+        if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()):
+            raise AccessDeniedError()
        await self.dist_registry.delete(obj.type, obj.identifier)
        await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])

@ -187,11 +191,12 @@ class CommonRoutingTableImpl(RoutingTable):
        p = self.impls_by_provider_id[obj.provider_id]

        # If object supports access control but no attributes set, use creator's attributes
-        if not obj.access_attributes:
-            creator_attributes = get_auth_attributes()
-            if creator_attributes:
-                obj.access_attributes = AccessAttributes(**creator_attributes)
-                logger.info(f"Setting access attributes for {obj.type} '{obj.identifier}' based on creator's identity")
+        creator = get_authenticated_user()
+        if not is_action_allowed(self.policy, "create", obj, creator):
+            raise AccessDeniedError()
+        if creator:
+            obj.owner = creator
+            logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")

        registered_obj = await register_object_with_provider(obj, p)
        # TODO: This needs to be fixed for all APIs once they return the registered object
@ -210,9 +215,7 @@ class CommonRoutingTableImpl(RoutingTable):
        # Apply attribute-based access control filtering
        if filtered_objs:
            filtered_objs = [
-                obj
-                for obj in filtered_objs
-                if check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes())
+                obj for obj in filtered_objs if is_action_allowed(self.policy, "read", obj, get_authenticated_user())
            ]

        return filtered_objs
--- a/llama_stack/distribution/routing_tables/datasets.py
+++ b/llama_stack/distribution/routing_tables/datasets.py
@ -19,7 +19,7 @@ from llama_stack.apis.datasets import (
 )
 from llama_stack.apis.resource import ResourceType
 from llama_stack.distribution.datatypes import (
-    DatasetWithACL,
+    DatasetWithOwner,
 )
 from llama_stack.log import get_logger

@ -74,7 +74,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
        if metadata is None:
            metadata = {}

-        dataset = DatasetWithACL(
+        dataset = DatasetWithOwner(
            identifier=dataset_id,
            provider_resource_id=provider_dataset_id,
            provider_id=provider_id,
--- a/llama_stack/distribution/routing_tables/models.py
+++ b/llama_stack/distribution/routing_tables/models.py
@ -9,7 +9,7 @@ from typing import Any

 from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
 from llama_stack.distribution.datatypes import (
-    ModelWithACL,
+    ModelWithOwner,
 )
 from llama_stack.log import get_logger

@ -65,7 +65,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
            model_type = ModelType.llm
        if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
            raise ValueError("Embedding model must have an embedding dimension in its metadata")
-        model = ModelWithACL(
+        model = ModelWithOwner(
            identifier=model_id,
            provider_resource_id=provider_model_id,
            provider_id=provider_id,
--- a/llama_stack/distribution/routing_tables/scoring_functions.py
+++ b/llama_stack/distribution/routing_tables/scoring_functions.py
@ -13,7 +13,7 @@ from llama_stack.apis.scoring_functions import (
    ScoringFunctions,
 )
 from llama_stack.distribution.datatypes import (
-    ScoringFnWithACL,
+    ScoringFnWithOwner,
 )
 from llama_stack.log import get_logger

@ -50,7 +50,7 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
                raise ValueError(
                    "No provider specified and multiple providers available. Please specify a provider_id."
                )
-        scoring_fn = ScoringFnWithACL(
+        scoring_fn = ScoringFnWithOwner(
            identifier=scoring_fn_id,
            description=description,
            return_type=return_type,
--- a/llama_stack/distribution/routing_tables/shields.py
+++ b/llama_stack/distribution/routing_tables/shields.py
@ -9,7 +9,7 @@ from typing import Any
 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
 from llama_stack.distribution.datatypes import (
-    ShieldWithACL,
+    ShieldWithOwner,
 )
 from llama_stack.log import get_logger

@ -47,7 +47,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
                )
        if params is None:
            params = {}
-        shield = ShieldWithACL(
+        shield = ShieldWithOwner(
            identifier=shield_id,
            provider_resource_id=provider_shield_id,
            provider_id=provider_id,
--- a/llama_stack/distribution/routing_tables/toolgroups.py
+++ b/llama_stack/distribution/routing_tables/toolgroups.py
@ -8,7 +8,7 @@ from typing import Any

 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
-from llama_stack.distribution.datatypes import ToolGroupWithACL
+from llama_stack.distribution.datatypes import ToolGroupWithOwner
 from llama_stack.log import get_logger

 from .common import CommonRoutingTableImpl
@ -106,7 +106,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
        mcp_endpoint: URL | None = None,
        args: dict[str, Any] | None = None,
    ) -> None:
-        toolgroup = ToolGroupWithACL(
+        toolgroup = ToolGroupWithOwner(
            identifier=toolgroup_id,
            provider_id=provider_id,
            provider_resource_id=toolgroup_id,
--- a/llama_stack/distribution/routing_tables/vector_dbs.py
+++ b/llama_stack/distribution/routing_tables/vector_dbs.py
@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelType
 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
 from llama_stack.distribution.datatypes import (
-    VectorDBWithACL,
+    VectorDBWithOwner,
 )
 from llama_stack.log import get_logger

@ -63,7 +63,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
            "embedding_model": embedding_model,
            "embedding_dimension": model.metadata["embedding_dimension"],
        }
-        vector_db = TypeAdapter(VectorDBWithACL).validate_python(vector_db_data)
+        vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
        await self.register_object(vector_db)
        return vector_db

--- a/llama_stack/distribution/server/auth.py
+++ b/llama_stack/distribution/server/auth.py
@ -105,24 +105,16 @@ class AuthenticationMiddleware:
                logger.exception("Error during authentication")
                return await self._send_auth_error(send, "Authentication service error")

-            # Store attributes in request scope for access control
-            if validation_result.access_attributes:
-                user_attributes = validation_result.access_attributes.model_dump(exclude_none=True)
-            else:
-                logger.warning("No access attributes, setting namespace to token by default")
-                user_attributes = {
-                    "roles": [token],
-                }
-
            # Store the client ID in the request scope so that downstream middleware (like QuotaMiddleware)
            # can identify the requester and enforce per-client rate limits.
            scope["authenticated_client_id"] = token

            # Store attributes in request scope
-            scope["user_attributes"] = user_attributes
            scope["principal"] = validation_result.principal
+            if validation_result.attributes:
+                scope["user_attributes"] = validation_result.attributes
            logger.debug(
-                f"Authentication successful: {validation_result.principal} with {len(scope['user_attributes'])} attributes"
+                f"Authentication successful: {validation_result.principal} with {len(validation_result.attributes)} attributes"
            )

        return await self.app(scope, receive, send)
--- a/llama_stack/distribution/server/auth_providers.py
+++ b/llama_stack/distribution/server/auth_providers.py
@ -16,43 +16,18 @@ from jose import jwt
 from pydantic import BaseModel, Field, field_validator, model_validator
 from typing_extensions import Self

-from llama_stack.distribution.datatypes import AccessAttributes, AuthenticationConfig, AuthProviderType
+from llama_stack.distribution.datatypes import AuthenticationConfig, AuthProviderType, User
 from llama_stack.log import get_logger

 logger = get_logger(name=__name__, category="auth")


-class TokenValidationResult(BaseModel):
-    principal: str | None = Field(
-        default=None,
-        description="The principal (username or persistent identifier) of the authenticated user",
-    )
-    access_attributes: AccessAttributes | None = Field(
-        default=None,
-        description="""
-        Structured user attributes for attribute-based access control.
-
-        These attributes determine which resources the user can access.
-        The model provides standard categories like "roles", "teams", "projects", and "namespaces".
-        Each attribute category contains a list of values that the user has for that category.
-        During access control checks, these values are compared against resource requirements.
-
-        Example with standard categories:
-        ```json
-        {
-            "roles": ["admin", "data-scientist"],
-            "teams": ["ml-team"],
-            "projects": ["llama-3"],
-            "namespaces": ["research"]
-        }
-        ```
-        """,
-    )
-
-
-class AuthResponse(TokenValidationResult):
+class AuthResponse(BaseModel):
    """The format of the authentication response from the auth endpoint."""

+    principal: str
+    # further attributes that may be used for access control decisions
+    attributes: dict[str, list[str]] | None = None
    message: str | None = Field(
        default=None, description="Optional message providing additional context about the authentication result."
    )
@ -78,7 +53,7 @@ class AuthProvider(ABC):
    """Abstract base class for authentication providers."""

    @abstractmethod
-    async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+    async def validate_token(self, token: str, scope: dict | None = None) -> User:
        """Validate a token and return access attributes."""
        pass

@ -88,10 +63,10 @@ class AuthProvider(ABC):
        pass


-def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> AccessAttributes:
-    attributes = AccessAttributes()
+def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
+    attributes: dict[str, list[str]] = {}
    for claim_key, attribute_key in mapping.items():
-        if claim_key not in claims or not hasattr(attributes, attribute_key):
+        if claim_key not in claims:
            continue
        claim = claims[claim_key]
        if isinstance(claim, list):
@ -99,11 +74,10 @@ def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str])
        else:
            values = claim.split()

-        current = getattr(attributes, attribute_key)
-        if current:
-            current.extend(values)
+        if attribute_key in attributes:
+            attributes[attribute_key].extend(values)
        else:
-            setattr(attributes, attribute_key, values)
+            attributes[attribute_key] = values
    return attributes


@ -145,8 +119,6 @@ class OAuth2TokenAuthProviderConfig(BaseModel):
        for key, value in v.items():
            if not value:
                raise ValueError(f"claims_mapping value cannot be empty: {key}")
-            if value not in AccessAttributes.model_fields:
-                raise ValueError(f"claims_mapping value is not a valid attribute: {value}")
        return v

    @model_validator(mode="after")
@ -171,14 +143,14 @@ class OAuth2TokenAuthProvider(AuthProvider):
        self._jwks: dict[str, str] = {}
        self._jwks_lock = Lock()

-    async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+    async def validate_token(self, token: str, scope: dict | None = None) -> User:
        if self.config.jwks:
            return await self.validate_jwt_token(token, scope)
        if self.config.introspection:
            return await self.introspect_token(token, scope)
        raise ValueError("One of jwks or introspection must be configured")

-    async def validate_jwt_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+    async def validate_jwt_token(self, token: str, scope: dict | None = None) -> User:
        """Validate a token using the JWT token."""
        await self._refresh_jwks()

@ -203,12 +175,12 @@ class OAuth2TokenAuthProvider(AuthProvider):
        # We should incorporate these into the access attributes.
        principal = claims["sub"]
        access_attributes = get_attributes_from_claims(claims, self.config.claims_mapping)
-        return TokenValidationResult(
+        return User(
            principal=principal,
-            access_attributes=access_attributes,
+            attributes=access_attributes,
        )

-    async def introspect_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+    async def introspect_token(self, token: str, scope: dict | None = None) -> User:
        """Validate a token using token introspection as defined by RFC 7662."""
        form = {
            "token": token,
@ -242,9 +214,9 @@ class OAuth2TokenAuthProvider(AuthProvider):
                    raise ValueError("Token not active")
                principal = fields["sub"] or fields["username"]
                access_attributes = get_attributes_from_claims(fields, self.config.claims_mapping)
-                return TokenValidationResult(
+                return User(
                    principal=principal,
-                    access_attributes=access_attributes,
+                    attributes=access_attributes,
                )
        except httpx.TimeoutException:
            logger.exception("Token introspection request timed out")
@ -299,7 +271,7 @@ class CustomAuthProvider(AuthProvider):
        self.config = config
        self._client = None

-    async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
+    async def validate_token(self, token: str, scope: dict | None = None) -> User:
        """Validate a token using the custom authentication endpoint."""
        if scope is None:
            scope = {}
@ -341,7 +313,7 @@ class CustomAuthProvider(AuthProvider):
                try:
                    response_data = response.json()
                    auth_response = AuthResponse(**response_data)
-                    return auth_response
+                    return User(auth_response.principal, auth_response.attributes)
                except Exception as e:
                    logger.exception("Error parsing authentication response")
                    raise ValueError("Invalid authentication response format") from e
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -18,7 +18,7 @@ from collections.abc import Callable
 from contextlib import asynccontextmanager
 from importlib.metadata import version as parse_version
 from pathlib import Path
-from typing import Annotated, Any
+from typing import Annotated, Any, get_origin

 import rich.pretty
 import yaml
@ -26,17 +26,13 @@ from aiohttp import hdrs
 from fastapi import Body, FastAPI, HTTPException, Request
 from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError

 from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
 from llama_stack.distribution.distribution import builtin_automatically_routed_apis
-from llama_stack.distribution.request_headers import (
-    PROVIDER_DATA_VAR,
-    request_provider_data_context,
-)
+from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context
 from llama_stack.distribution.resolver import InvalidProviderError
 from llama_stack.distribution.server.routes import (
    find_matching_route,
@ -217,11 +213,13 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
    async def route_handler(request: Request, **kwargs):
        # Get auth attributes from the request scope
        user_attributes = request.scope.get("user_attributes", {})
+        principal = request.scope.get("principal", "")
+        user = User(principal, user_attributes)

        await log_request_pre_validation(request)

        # Use context manager with both provider data and auth attributes
-        with request_provider_data_context(request.headers, user_attributes):
+        with request_provider_data_context(request.headers, user):
            is_streaming = is_streaming_request(func.__name__, request, **kwargs)

            try:
@ -244,15 +242,23 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:

    path_params = extract_path_params(route)
    if method == "post":
-        # Annotate parameters that are in the path with Path(...) and others with Body(...)
-        new_params = [new_params[0]] + [
-            (
-                param.replace(annotation=Annotated[param.annotation, FastapiPath(..., title=param.name)])
-                if param.name in path_params
-                else param.replace(annotation=Annotated[param.annotation, Body(..., embed=True)])
-            )
-            for param in new_params[1:]
-        ]
+        # Annotate parameters that are in the path with Path(...) and others with Body(...),
+        # but preserve existing File() and Form() annotations for multipart form data
+        new_params = (
+            [new_params[0]]
+            + [
+                (
+                    param.replace(annotation=Annotated[param.annotation, FastapiPath(..., title=param.name)])
+                    if param.name in path_params
+                    else (
+                        param  # Keep original annotation if it's already an Annotated type
+                        if get_origin(param.annotation) is Annotated
+                        else param.replace(annotation=Annotated[param.annotation, Body(..., embed=True)])
+                    )
+                )
+                for param in new_params[1:]
+            ]
+        )

    route_handler.__signature__ = sig.replace(parameters=new_params)

@ -472,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
            window_seconds=window_seconds,
        )

-    # --- CORS middleware for local development ---
-    # TODO: move to reverse proxy
-    ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
-    app.add_middleware(
-        CORSMiddleware,
-        allow_origins=[f"http://localhost:{ui_port}"],
-        allow_credentials=True,
-        allow_methods=["*"],
-        allow_headers=["*"],
-    )
-
    try:
        impls = asyncio.run(construct_stack(config))
    except InvalidProviderError as e:
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -223,7 +223,10 @@ async def construct_stack(
    run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None
 ) -> dict[Api, Any]:
    dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
-    impls = await resolve_impls(run_config, provider_registry or get_provider_registry(run_config), dist_registry)
+    policy = run_config.server.auth.access_policy if run_config.server.auth else []
+    impls = await resolve_impls(
+        run_config, provider_registry or get_provider_registry(run_config), dist_registry, policy
+    )

    # Add internal implementations after all other providers are resolved
    add_internal_implementations(impls, run_config)
--- a/llama_stack/distribution/start_stack.sh
+++ b/llama_stack/distribution/start_stack.sh
@ -7,10 +7,6 @@
 # the root directory of this source tree.


-CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
-CONTAINER_OPTS=${CONTAINER_OPTS:-}
-LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-}
-LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
 TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
 PYPI_VERSION=${PYPI_VERSION:-}
 VIRTUAL_ENV=${VIRTUAL_ENV:-}
@ -132,63 +128,7 @@ if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
    $env_vars \
    $other_args
 elif [[ "$env_type" == "container" ]]; then
-    set -x
-
-    # Check if container command is available
-    if ! is_command_available $CONTAINER_BINARY; then
-      printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
-      exit 1
-    fi
-
-    if is_command_available selinuxenabled &> /dev/null && selinuxenabled; then
-        # Disable SELinux labels
-        CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
-    fi
-
-    mounts=""
-    if [ -n "$LLAMA_STACK_DIR" ]; then
-        mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):/app/llama-stack-source"
-    fi
-    if [ -n "$LLAMA_CHECKPOINT_DIR" ]; then
-        mounts="$mounts -v $LLAMA_CHECKPOINT_DIR:/root/.llama"
-        CONTAINER_OPTS="$CONTAINER_OPTS --gpus=all"
-    fi
-
-    if [ -n "$PYPI_VERSION" ]; then
-        version_tag="$PYPI_VERSION"
-    elif [ -n "$LLAMA_STACK_DIR" ]; then
-        version_tag="dev"
-    elif [ -n "$TEST_PYPI_VERSION" ]; then
-        version_tag="test-$TEST_PYPI_VERSION"
-    else
-        if ! is_command_available jq; then
-            echo -e "${RED}Error: jq not found" >&2
-            exit 1
-        fi
-        URL="https://pypi.org/pypi/llama-stack/json"
-        version_tag=$(curl -s $URL | jq -r '.info.version')
-    fi
-
-    # Build the command with optional yaml config
-    cmd="$CONTAINER_BINARY run $CONTAINER_OPTS -it \
-    -p $port:$port \
-    $env_vars \
-    $mounts \
-    --env LLAMA_STACK_PORT=$port \
-    --entrypoint python \
-    $container_image:$version_tag \
-    -m llama_stack.distribution.server.server"
-
-    # Add yaml config if provided, otherwise use default
-    if [ -n "$yaml_config" ]; then
-        cmd="$cmd -v $yaml_config:/app/run.yaml --config /app/run.yaml"
-    else
-        cmd="$cmd --config /app/run.yaml"
-    fi
-
-    # Add any other args
-    cmd="$cmd $other_args"
-
-    # Execute the command
-    eval $cmd
+    echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}"
+    echo -e "Please refer to the documentation for more information: https://llama-stack.readthedocs.io/en/latest/distributions/building_distro.html#llama-stack-build"
+    exit 1
 fi
--- a/llama_stack/distribution/utils/exec.py
+++ b/llama_stack/distribution/utils/exec.py
@ -23,11 +23,8 @@ from llama_stack.distribution.utils.image_types import LlamaStackImageType

 def formulate_run_args(image_type, image_name, config, template_name) -> list:
    env_name = ""
-    if image_type == LlamaStackImageType.CONTAINER.value:
-        env_name = (
-            f"distribution-{template_name}" if template_name else (config.container_image if config else image_name)
-        )
-    elif image_type == LlamaStackImageType.CONDA.value:
+
+    if image_type == LlamaStackImageType.CONDA.value:
        current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
        env_name = image_name or current_conda_env
        if not env_name:
--- a/llama_stack/models/llama/llama3/tokenizer.py
+++ b/llama_stack/models/llama/llama3/tokenizer.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import os
 from collections.abc import Collection, Iterator, Sequence, Set
 from logging import getLogger
 from pathlib import Path
@ -14,7 +13,8 @@ from typing import (
 )

 import tiktoken
-from tiktoken.load import load_tiktoken_bpe
+
+from llama_stack.models.llama.tokenizer_utils import load_bpe_file

 logger = getLogger(__name__)

@ -48,19 +48,20 @@ class Tokenizer:
        global _INSTANCE

        if _INSTANCE is None:
-            _INSTANCE = Tokenizer(os.path.join(os.path.dirname(__file__), "tokenizer.model"))
+            _INSTANCE = Tokenizer(Path(__file__).parent / "tokenizer.model")
        return _INSTANCE

-    def __init__(self, model_path: str):
+    def __init__(self, model_path: Path):
        """
        Initializes the Tokenizer with a Tiktoken model.

        Args:
            model_path (str): The path to the Tiktoken model file.
        """
-        assert os.path.isfile(model_path), model_path
+        if not model_path.exists():
+            raise FileNotFoundError(f"Tokenizer model file not found: {model_path}")

-        mergeable_ranks = load_tiktoken_bpe(model_path)
+        mergeable_ranks = load_bpe_file(model_path)
        num_base_tokens = len(mergeable_ranks)
        special_tokens = [
            "<|begin_of_text|>",
@ -83,7 +84,7 @@ class Tokenizer:

        self.special_tokens = {token: num_base_tokens + i for i, token in enumerate(special_tokens)}
        self.model = tiktoken.Encoding(
-            name=Path(model_path).name,
+            name=model_path.name,
            pat_str=self.pat_str,
            mergeable_ranks=mergeable_ranks,
            special_tokens=self.special_tokens,
--- a/llama_stack/models/llama/llama4/tokenizer.py
+++ b/llama_stack/models/llama/llama4/tokenizer.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import os
 from collections.abc import Collection, Iterator, Sequence, Set
 from logging import getLogger
 from pathlib import Path
@ -14,7 +13,8 @@ from typing import (
 )

 import tiktoken
-from tiktoken.load import load_tiktoken_bpe
+
+from llama_stack.models.llama.tokenizer_utils import load_bpe_file

 logger = getLogger(__name__)

@ -118,19 +118,20 @@ class Tokenizer:
        global _INSTANCE

        if _INSTANCE is None:
-            _INSTANCE = Tokenizer(os.path.join(os.path.dirname(__file__), "tokenizer.model"))
+            _INSTANCE = Tokenizer(Path(__file__).parent / "tokenizer.model")
        return _INSTANCE

-    def __init__(self, model_path: str):
+    def __init__(self, model_path: Path):
        """
        Initializes the Tokenizer with a Tiktoken model.

        Args:
-            model_path (str): The path to the Tiktoken model file.
+            model_path (Path): The path to the Tiktoken model file.
        """
-        assert os.path.isfile(model_path), model_path
+        if not model_path.exists():
+            raise FileNotFoundError(f"Tokenizer model file not found: {model_path}")

-        mergeable_ranks = load_tiktoken_bpe(model_path)
+        mergeable_ranks = load_bpe_file(model_path)
        num_base_tokens = len(mergeable_ranks)

        special_tokens = BASIC_SPECIAL_TOKENS + LLAMA4_SPECIAL_TOKENS
@ -144,7 +145,7 @@ class Tokenizer:

        self.special_tokens = {token: num_base_tokens + i for i, token in enumerate(special_tokens)}
        self.model = tiktoken.Encoding(
-            name=Path(model_path).name,
+            name=model_path.name,
            pat_str=self.O200K_PATTERN,
            mergeable_ranks=mergeable_ranks,
            special_tokens=self.special_tokens,
--- a/llama_stack/models/llama/tokenizer_utils.py
+++ b/llama_stack/models/llama/tokenizer_utils.py
@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import base64
+from pathlib import Path
+
+from llama_stack.log import get_logger
+
+logger = get_logger(__name__, "tokenizer_utils")
+
+
+def load_bpe_file(model_path: Path) -> dict[bytes, int]:
+    """
+    Load BPE file directly and return mergeable ranks.
+
+    Args:
+        model_path (Path): Path to the BPE model file.
+
+    Returns:
+        dict[bytes, int]: Dictionary mapping byte sequences to their ranks.
+    """
+    mergeable_ranks = {}
+
+    with open(model_path, encoding="utf-8") as f:
+        content = f.read()
+
+    for line in content.splitlines():
+        if not line.strip():  # Skip empty lines
+            continue
+        try:
+            token, rank = line.split()
+            mergeable_ranks[base64.b64decode(token)] = int(rank)
+        except Exception as e:
+            logger.warning(f"Failed to parse line '{line}': {e}")
+            continue
+
+    return mergeable_ranks
--- a/llama_stack/providers/inline/agents/meta_reference/init.py
+++ b/llama_stack/providers/inline/agents/meta_reference/init.py
@ -6,12 +6,12 @@

 from typing import Any

-from llama_stack.distribution.datatypes import Api
+from llama_stack.distribution.datatypes import AccessRule, Api

 from .config import MetaReferenceAgentsImplConfig


-async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Api, Any]):
+async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Api, Any], policy: list[AccessRule]):
    from .agents import MetaReferenceAgentsImpl

    impl = MetaReferenceAgentsImpl(
@ -21,6 +21,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
        deps[Api.safety],
        deps[Api.tool_runtime],
        deps[Api.tool_groups],
+        policy,
    )
    await impl.initialize()
    return impl
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -60,6 +60,7 @@ from llama_stack.apis.inference import (
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
+from llama_stack.distribution.datatypes import AccessRule
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import (
    BuiltinTool,
@ -96,13 +97,14 @@ class ChatAgent(ShieldRunnerMixin):
        vector_io_api: VectorIO,
        persistence_store: KVStore,
        created_at: str,
+        policy: list[AccessRule],
    ):
        self.agent_id = agent_id
        self.agent_config = agent_config
        self.inference_api = inference_api
        self.safety_api = safety_api
        self.vector_io_api = vector_io_api
-        self.storage = AgentPersistence(agent_id, persistence_store)
+        self.storage = AgentPersistence(agent_id, persistence_store, policy)
        self.tool_runtime_api = tool_runtime_api
        self.tool_groups_api = tool_groups_api
        self.created_at = created_at
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@ -29,6 +29,7 @@ from llama_stack.apis.agents import (
    Session,
    Turn,
 )
+from llama_stack.apis.agents.openai_responses import OpenAIResponseText
 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.inference import (
    Inference,
@ -40,6 +41,7 @@ from llama_stack.apis.inference import (
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
+from llama_stack.distribution.datatypes import AccessRule
 from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
@ -61,6 +63,7 @@ class MetaReferenceAgentsImpl(Agents):
        safety_api: Safety,
        tool_runtime_api: ToolRuntime,
        tool_groups_api: ToolGroups,
+        policy: list[AccessRule],
    ):
        self.config = config
        self.inference_api = inference_api
@ -71,6 +74,7 @@ class MetaReferenceAgentsImpl(Agents):

        self.in_memory_store = InmemoryKVStoreImpl()
        self.openai_responses_impl: OpenAIResponsesImpl | None = None
+        self.policy = policy

    async def initialize(self) -> None:
        self.persistence_store = await kvstore_impl(self.config.persistence_store)
@ -129,6 +133,7 @@ class MetaReferenceAgentsImpl(Agents):
                self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
            ),
            created_at=agent_info.created_at,
+            policy=self.policy,
        )

    async def create_agent_session(
@ -324,10 +329,12 @@ class MetaReferenceAgentsImpl(Agents):
        store: bool | None = True,
        stream: bool | None = False,
        temperature: float | None = None,
+        text: OpenAIResponseText | None = None,
        tools: list[OpenAIResponseInputTool] | None = None,
+        max_infer_iters: int | None = 10,
    ) -> OpenAIResponseObject:
        return await self.openai_responses_impl.create_openai_response(
-            input, model, instructions, previous_response_id, store, stream, temperature, tools
+            input, model, instructions, previous_response_id, store, stream, temperature, text, tools, max_infer_iters
        )

    async def list_openai_responses(
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@ -37,6 +37,8 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageFunctionToolCall,
    OpenAIResponseOutputMessageMCPListTools,
    OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
 )
 from llama_stack.apis.inference.inference import (
    Inference,
@ -50,7 +52,12 @@ from llama_stack.apis.inference.inference import (
    OpenAIChoice,
    OpenAIDeveloperMessageParam,
    OpenAIImageURL,
+    OpenAIJSONSchema,
    OpenAIMessageParam,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatParam,
+    OpenAIResponseFormatText,
    OpenAISystemMessageParam,
    OpenAIToolMessageParam,
    OpenAIUserMessageParam,
@ -158,6 +165,21 @@ async def _convert_chat_choice_to_response_message(choice: OpenAIChoice) -> Open
    )


+async def _convert_response_text_to_chat_response_format(text: OpenAIResponseText) -> OpenAIResponseFormatParam:
+    """
+    Convert an OpenAI Response text parameter into an OpenAI Chat Completion response format.
+    """
+    if not text.format or text.format["type"] == "text":
+        return OpenAIResponseFormatText(type="text")
+    if text.format["type"] == "json_object":
+        return OpenAIResponseFormatJSONObject()
+    if text.format["type"] == "json_schema":
+        return OpenAIResponseFormatJSONSchema(
+            json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"])
+        )
+    raise ValueError(f"Unsupported text format: {text.format}")
+
+
 async def _get_message_type_by_role(role: str):
    role_to_type = {
        "user": OpenAIUserMessageParam,
@ -180,6 +202,7 @@ class ChatCompletionContext(BaseModel):
    mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP]
    stream: bool
    temperature: float | None
+    response_format: OpenAIResponseFormatParam


 class OpenAIResponsesImpl:
@ -258,6 +281,18 @@ class OpenAIResponsesImpl:
        """
        return await self.responses_store.list_response_input_items(response_id, after, before, include, limit, order)

+    def _is_function_tool_call(
+        self,
+        tool_call: OpenAIChatCompletionToolCall,
+        tools: list[OpenAIResponseInputTool],
+    ) -> bool:
+        if not tool_call.function:
+            return False
+        for t in tools:
+            if t.type == "function" and t.name == tool_call.function.name:
+                return True
+        return False
+
    async def _process_response_choices(
        self,
        chat_response: OpenAIChatCompletion,
@ -270,7 +305,7 @@ class OpenAIResponsesImpl:
        for choice in chat_response.choices:
            if choice.message.tool_calls and tools:
                # Assume if the first tool is a function, all tools are functions
-                if tools[0].type == "function":
+                if self._is_function_tool_call(choice.message.tool_calls[0], tools):
                    for tool_call in choice.message.tool_calls:
                        output_messages.append(
                            OpenAIResponseOutputMessageFunctionToolCall(
@ -331,9 +366,12 @@ class OpenAIResponsesImpl:
        store: bool | None = True,
        stream: bool | None = False,
        temperature: float | None = None,
+        text: OpenAIResponseText | None = None,
        tools: list[OpenAIResponseInputTool] | None = None,
+        max_infer_iters: int | None = 10,
    ):
        stream = False if stream is None else stream
+        text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text

        output_messages: list[OpenAIResponseOutput] = []

@ -342,6 +380,9 @@ class OpenAIResponsesImpl:
        messages = await _convert_response_input_to_chat_messages(input)
        await self._prepend_instructions(messages, instructions)

+        # Structured outputs
+        response_format = await _convert_response_text_to_chat_response_format(text)
+
        # Tool setup
        chat_tools, mcp_tool_to_server, mcp_list_message = (
            await self._convert_response_tools_to_chat_tools(tools) if tools else (None, {}, None)
@ -356,65 +397,111 @@ class OpenAIResponsesImpl:
            mcp_tool_to_server=mcp_tool_to_server,
            stream=stream,
            temperature=temperature,
+            response_format=response_format,
        )

-        inference_result = await self.inference_api.openai_chat_completion(
-            model=model,
-            messages=messages,
-            tools=chat_tools,
-            stream=stream,
-            temperature=temperature,
-        )
-
+        # Fork to streaming vs non-streaming - let each handle ALL inference rounds
        if stream:
            return self._create_streaming_response(
-                inference_result=inference_result,
                ctx=ctx,
                output_messages=output_messages,
                input=input,
                model=model,
                store=store,
+                text=text,
                tools=tools,
+                max_infer_iters=max_infer_iters,
            )
        else:
            return await self._create_non_streaming_response(
-                inference_result=inference_result,
                ctx=ctx,
                output_messages=output_messages,
                input=input,
                model=model,
                store=store,
+                text=text,
                tools=tools,
+                max_infer_iters=max_infer_iters,
            )

    async def _create_non_streaming_response(
        self,
-        inference_result: Any,
        ctx: ChatCompletionContext,
        output_messages: list[OpenAIResponseOutput],
        input: str | list[OpenAIResponseInput],
        model: str,
        store: bool | None,
+        text: OpenAIResponseText,
        tools: list[OpenAIResponseInputTool] | None,
+        max_infer_iters: int,
    ) -> OpenAIResponseObject:
-        chat_response = OpenAIChatCompletion(**inference_result.model_dump())
+        n_iter = 0
+        messages = ctx.messages.copy()

-        # Process response choices (tool execution and message creation)
-        output_messages.extend(
-            await self._process_response_choices(
-                chat_response=chat_response,
-                ctx=ctx,
-                tools=tools,
+        while True:
+            # Do inference (including the first one)
+            inference_result = await self.inference_api.openai_chat_completion(
+                model=ctx.model,
+                messages=messages,
+                tools=ctx.tools,
+                stream=False,
+                temperature=ctx.temperature,
+                response_format=ctx.response_format,
            )
-        )
+            completion = OpenAIChatCompletion(**inference_result.model_dump())
+
+            # Separate function vs non-function tool calls
+            function_tool_calls = []
+            non_function_tool_calls = []
+
+            for choice in completion.choices:
+                if choice.message.tool_calls and tools:
+                    for tool_call in choice.message.tool_calls:
+                        if self._is_function_tool_call(tool_call, tools):
+                            function_tool_calls.append(tool_call)
+                        else:
+                            non_function_tool_calls.append(tool_call)
+
+            # Process response choices based on tool call types
+            if function_tool_calls:
+                # For function tool calls, use existing logic and return immediately
+                current_output_messages = await self._process_response_choices(
+                    chat_response=completion,
+                    ctx=ctx,
+                    tools=tools,
+                )
+                output_messages.extend(current_output_messages)
+                break
+            elif non_function_tool_calls:
+                # For non-function tool calls, execute them and continue loop
+                for choice in completion.choices:
+                    tool_outputs, tool_response_messages = await self._execute_tool_calls_only(choice, ctx)
+                    output_messages.extend(tool_outputs)
+
+                    # Add assistant message and tool responses to messages for next iteration
+                    messages.append(choice.message)
+                    messages.extend(tool_response_messages)
+
+                n_iter += 1
+                if n_iter >= max_infer_iters:
+                    break
+
+                # Continue with next iteration of the loop
+                continue
+            else:
+                # No tool calls - convert response to message and we're done
+                for choice in completion.choices:
+                    output_messages.append(await _convert_chat_choice_to_response_message(choice))
+                break

        response = OpenAIResponseObject(
-            created_at=chat_response.created,
+            created_at=completion.created,
            id=f"resp-{uuid.uuid4()}",
            model=model,
            object="response",
            status="completed",
            output=output_messages,
+            text=text,
        )
        logger.debug(f"OpenAI Responses response: {response}")

@ -429,13 +516,14 @@ class OpenAIResponsesImpl:

    async def _create_streaming_response(
        self,
-        inference_result: Any,
        ctx: ChatCompletionContext,
        output_messages: list[OpenAIResponseOutput],
        input: str | list[OpenAIResponseInput],
        model: str,
        store: bool | None,
+        text: OpenAIResponseText,
        tools: list[OpenAIResponseInputTool] | None,
+        max_infer_iters: int | None,
    ) -> AsyncIterator[OpenAIResponseObjectStream]:
        # Create initial response and emit response.created immediately
        response_id = f"resp-{uuid.uuid4()}"
@ -448,92 +536,141 @@ class OpenAIResponsesImpl:
            object="response",
            status="in_progress",
            output=output_messages.copy(),
+            text=text,
        )

        # Emit response.created immediately
        yield OpenAIResponseObjectStreamResponseCreated(response=initial_response)

-        # For streaming, inference_result is an async iterator of chunks
-        # Stream chunks and emit delta events as they arrive
-        chat_response_id = ""
-        chat_response_content = []
-        chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
-        chunk_created = 0
-        chunk_model = ""
-        chunk_finish_reason = ""
-        sequence_number = 0
+        # Implement tool execution loop for streaming - handle ALL inference rounds including the first
+        n_iter = 0
+        messages = ctx.messages.copy()

-        # Create a placeholder message item for delta events
-        message_item_id = f"msg_{uuid.uuid4()}"
-
-        async for chunk in inference_result:
-            chat_response_id = chunk.id
-            chunk_created = chunk.created
-            chunk_model = chunk.model
-            for chunk_choice in chunk.choices:
-                # Emit incremental text content as delta events
-                if chunk_choice.delta.content:
-                    sequence_number += 1
-                    yield OpenAIResponseObjectStreamResponseOutputTextDelta(
-                        content_index=0,
-                        delta=chunk_choice.delta.content,
-                        item_id=message_item_id,
-                        output_index=0,
-                        sequence_number=sequence_number,
-                    )
-
-                # Collect content for final response
-                chat_response_content.append(chunk_choice.delta.content or "")
-                if chunk_choice.finish_reason:
-                    chunk_finish_reason = chunk_choice.finish_reason
-
-                # Aggregate tool call arguments across chunks, using their index as the aggregation key
-                if chunk_choice.delta.tool_calls:
-                    for tool_call in chunk_choice.delta.tool_calls:
-                        response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
-                        if response_tool_call:
-                            # Don't attempt to concatenate arguments if we don't have any new arguments
-                            if tool_call.function.arguments:
-                                # Guard against an initial None argument before we concatenate
-                                response_tool_call.function.arguments = (
-                                    response_tool_call.function.arguments or ""
-                                ) + tool_call.function.arguments
-                        else:
-                            tool_call_dict: dict[str, Any] = tool_call.model_dump()
-                            tool_call_dict.pop("type", None)
-                            response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
-                        chat_response_tool_calls[tool_call.index] = response_tool_call
-
-        # Convert collected chunks to complete response
-        if chat_response_tool_calls:
-            tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
-        else:
-            tool_calls = None
-        assistant_message = OpenAIAssistantMessageParam(
-            content="".join(chat_response_content),
-            tool_calls=tool_calls,
-        )
-        chat_response_obj = OpenAIChatCompletion(
-            id=chat_response_id,
-            choices=[
-                OpenAIChoice(
-                    message=assistant_message,
-                    finish_reason=chunk_finish_reason,
-                    index=0,
-                )
-            ],
-            created=chunk_created,
-            model=chunk_model,
-        )
-
-        # Process response choices (tool execution and message creation)
-        output_messages.extend(
-            await self._process_response_choices(
-                chat_response=chat_response_obj,
-                ctx=ctx,
-                tools=tools,
+        while True:
+            current_inference_result = await self.inference_api.openai_chat_completion(
+                model=ctx.model,
+                messages=messages,
+                tools=ctx.tools,
+                stream=True,
+                temperature=ctx.temperature,
+                response_format=ctx.response_format,
            )
-        )
+
+            # Process streaming chunks and build complete response
+            chat_response_id = ""
+            chat_response_content = []
+            chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
+            chunk_created = 0
+            chunk_model = ""
+            chunk_finish_reason = ""
+            sequence_number = 0
+
+            # Create a placeholder message item for delta events
+            message_item_id = f"msg_{uuid.uuid4()}"
+
+            async for chunk in current_inference_result:
+                chat_response_id = chunk.id
+                chunk_created = chunk.created
+                chunk_model = chunk.model
+                for chunk_choice in chunk.choices:
+                    # Emit incremental text content as delta events
+                    if chunk_choice.delta.content:
+                        sequence_number += 1
+                        yield OpenAIResponseObjectStreamResponseOutputTextDelta(
+                            content_index=0,
+                            delta=chunk_choice.delta.content,
+                            item_id=message_item_id,
+                            output_index=0,
+                            sequence_number=sequence_number,
+                        )
+
+                    # Collect content for final response
+                    chat_response_content.append(chunk_choice.delta.content or "")
+                    if chunk_choice.finish_reason:
+                        chunk_finish_reason = chunk_choice.finish_reason
+
+                    # Aggregate tool call arguments across chunks
+                    if chunk_choice.delta.tool_calls:
+                        for tool_call in chunk_choice.delta.tool_calls:
+                            response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
+                            if response_tool_call:
+                                # Don't attempt to concatenate arguments if we don't have any new argumentsAdd commentMore actions
+                                if tool_call.function.arguments:
+                                    # Guard against an initial None argument before we concatenate
+                                    response_tool_call.function.arguments = (
+                                        response_tool_call.function.arguments or ""
+                                    ) + tool_call.function.arguments
+                            else:
+                                tool_call_dict: dict[str, Any] = tool_call.model_dump()
+                                tool_call_dict.pop("type", None)
+                                response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
+                            chat_response_tool_calls[tool_call.index] = response_tool_call
+
+            # Convert collected chunks to complete response
+            if chat_response_tool_calls:
+                tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
+            else:
+                tool_calls = None
+            assistant_message = OpenAIAssistantMessageParam(
+                content="".join(chat_response_content),
+                tool_calls=tool_calls,
+            )
+            current_response = OpenAIChatCompletion(
+                id=chat_response_id,
+                choices=[
+                    OpenAIChoice(
+                        message=assistant_message,
+                        finish_reason=chunk_finish_reason,
+                        index=0,
+                    )
+                ],
+                created=chunk_created,
+                model=chunk_model,
+            )
+
+            # Separate function vs non-function tool calls
+            function_tool_calls = []
+            non_function_tool_calls = []
+
+            for choice in current_response.choices:
+                if choice.message.tool_calls and tools:
+                    for tool_call in choice.message.tool_calls:
+                        if self._is_function_tool_call(tool_call, tools):
+                            function_tool_calls.append(tool_call)
+                        else:
+                            non_function_tool_calls.append(tool_call)
+
+            # Process response choices based on tool call types
+            if function_tool_calls:
+                # For function tool calls, use existing logic and break
+                current_output_messages = await self._process_response_choices(
+                    chat_response=current_response,
+                    ctx=ctx,
+                    tools=tools,
+                )
+                output_messages.extend(current_output_messages)
+                break
+            elif non_function_tool_calls:
+                # For non-function tool calls, execute them and continue loop
+                for choice in current_response.choices:
+                    tool_outputs, tool_response_messages = await self._execute_tool_calls_only(choice, ctx)
+                    output_messages.extend(tool_outputs)
+
+                    # Add assistant message and tool responses to messages for next iteration
+                    messages.append(choice.message)
+                    messages.extend(tool_response_messages)
+
+                n_iter += 1
+                if n_iter >= (max_infer_iters or 10):
+                    break
+
+                # Continue with next iteration of the loop
+                continue
+            else:
+                # No tool calls - convert response to message and we're done
+                for choice in current_response.choices:
+                    output_messages.append(await _convert_chat_choice_to_response_message(choice))
+                break

        # Create final response
        final_response = OpenAIResponseObject(
@ -542,6 +679,7 @@ class OpenAIResponsesImpl:
            model=model,
            object="response",
            status="completed",
+            text=text,
            output=output_messages,
        )

@ -646,6 +784,30 @@ class OpenAIResponsesImpl:
                raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
        return chat_tools, mcp_tool_to_server, mcp_list_message

+    async def _execute_tool_calls_only(
+        self,
+        choice: OpenAIChoice,
+        ctx: ChatCompletionContext,
+    ) -> tuple[list[OpenAIResponseOutput], list[OpenAIMessageParam]]:
+        """Execute tool calls and return output messages and tool response messages for next inference."""
+        output_messages: list[OpenAIResponseOutput] = []
+        tool_response_messages: list[OpenAIMessageParam] = []
+
+        if not isinstance(choice.message, OpenAIAssistantMessageParam):
+            return output_messages, tool_response_messages
+
+        if not choice.message.tool_calls:
+            return output_messages, tool_response_messages
+
+        for tool_call in choice.message.tool_calls:
+            tool_call_log, further_input = await self._execute_tool_call(tool_call, ctx)
+            if tool_call_log:
+                output_messages.append(tool_call_log)
+            if further_input:
+                tool_response_messages.append(further_input)
+
+        return output_messages, tool_response_messages
+
    async def _execute_tool_and_return_final_output(
        self,
        choice: OpenAIChoice,
@ -772,5 +934,8 @@ class OpenAIResponsesImpl:
            else:
                raise ValueError(f"Unknown result content type: {type(result.content)}")
            input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
+        else:
+            text = str(error_exc)
+            input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)

        return message, input_message
--- a/llama_stack/providers/inline/agents/meta_reference/persistence.py
+++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py
@ -10,9 +10,10 @@ import uuid
 from datetime import datetime, timezone

 from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn
-from llama_stack.distribution.access_control import check_access
-from llama_stack.distribution.datatypes import AccessAttributes
-from llama_stack.distribution.request_headers import get_auth_attributes
+from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
+from llama_stack.distribution.access_control.datatypes import AccessRule
+from llama_stack.distribution.datatypes import User
+from llama_stack.distribution.request_headers import get_authenticated_user
 from llama_stack.providers.utils.kvstore import KVStore

 log = logging.getLogger(__name__)
@ -22,7 +23,9 @@ class AgentSessionInfo(Session):
    # TODO: is this used anywhere?
    vector_db_id: str | None = None
    started_at: datetime
-    access_attributes: AccessAttributes | None = None
+    owner: User | None = None
+    identifier: str | None = None
+    type: str = "session"


 class AgentInfo(AgentConfig):
@ -30,24 +33,27 @@ class AgentInfo(AgentConfig):


 class AgentPersistence:
-    def __init__(self, agent_id: str, kvstore: KVStore):
+    def __init__(self, agent_id: str, kvstore: KVStore, policy: list[AccessRule]):
        self.agent_id = agent_id
        self.kvstore = kvstore
+        self.policy = policy

    async def create_session(self, name: str) -> str:
        session_id = str(uuid.uuid4())

        # Get current user's auth attributes for new sessions
-        auth_attributes = get_auth_attributes()
-        access_attributes = AccessAttributes(**auth_attributes) if auth_attributes else None
+        user = get_authenticated_user()

        session_info = AgentSessionInfo(
            session_id=session_id,
            session_name=name,
            started_at=datetime.now(timezone.utc),
-            access_attributes=access_attributes,
+            owner=user,
            turns=[],
+            identifier=name,  # should this be qualified in any way?
        )
+        if not is_action_allowed(self.policy, "create", session_info, user):
+            raise AccessDeniedError()

        await self.kvstore.set(
            key=f"session:{self.agent_id}:{session_id}",
@ -73,10 +79,10 @@ class AgentPersistence:
    def _check_session_access(self, session_info: AgentSessionInfo) -> bool:
        """Check if current user has access to the session."""
        # Handle backward compatibility for old sessions without access control
-        if not hasattr(session_info, "access_attributes"):
+        if not hasattr(session_info, "access_attributes") and not hasattr(session_info, "owner"):
            return True

-        return check_access(session_info.session_id, session_info.access_attributes, get_auth_attributes())
+        return is_action_allowed(self.policy, "read", session_info, get_authenticated_user())

    async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None:
        """Get session info if the user has access to it. For internal use by sub-session methods."""
--- a/llama_stack/providers/inline/files/localfs/init.py
+++ b/llama_stack/providers/inline/files/localfs/init.py
@ -0,0 +1,20 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.distribution.datatypes import Api
+
+from .config import LocalfsFilesImplConfig
+from .files import LocalfsFilesImpl
+
+__all__ = ["LocalfsFilesImpl", "LocalfsFilesImplConfig"]
+
+
+async def get_provider_impl(config: LocalfsFilesImplConfig, deps: dict[Api, Any]):
+    impl = LocalfsFilesImpl(config)
+    await impl.initialize()
+    return impl
--- a/llama_stack/providers/inline/files/localfs/config.py
+++ b/llama_stack/providers/inline/files/localfs/config.py
@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
+
+
+class LocalfsFilesImplConfig(BaseModel):
+    storage_dir: str = Field(
+        description="Directory to store uploaded files",
+    )
+    metadata_store: SqlStoreConfig = Field(
+        description="SQL store configuration for file metadata",
+    )
+    ttl_secs: int = 365 * 24 * 60 * 60  # 1 year
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
+        return {
+            "storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}",
+            "metadata_store": SqliteSqlStoreConfig.sample_run_config(
+                __distro_dir__=__distro_dir__,
+                db_name="files_metadata.db",
+            ),
+        }
--- a/llama_stack/providers/inline/files/localfs/files.py
+++ b/llama_stack/providers/inline/files/localfs/files.py
@ -0,0 +1,214 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import time
+import uuid
+from pathlib import Path
+from typing import Annotated
+
+from fastapi import File, Form, Response, UploadFile
+
+from llama_stack.apis.common.responses import Order
+from llama_stack.apis.files import (
+    Files,
+    ListOpenAIFileResponse,
+    OpenAIFileDeleteResponse,
+    OpenAIFileObject,
+    OpenAIFilePurpose,
+)
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.sqlstore import SqlStore, sqlstore_impl
+
+from .config import LocalfsFilesImplConfig
+
+
+class LocalfsFilesImpl(Files):
+    def __init__(self, config: LocalfsFilesImplConfig) -> None:
+        self.config = config
+        self.sql_store: SqlStore | None = None
+
+    async def initialize(self) -> None:
+        """Initialize the files provider by setting up storage directory and metadata database."""
+        # Create storage directory if it doesn't exist
+        storage_path = Path(self.config.storage_dir)
+        storage_path.mkdir(parents=True, exist_ok=True)
+
+        # Initialize SQL store for metadata
+        self.sql_store = sqlstore_impl(self.config.metadata_store)
+        await self.sql_store.create_table(
+            "openai_files",
+            {
+                "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
+                "filename": ColumnType.STRING,
+                "purpose": ColumnType.STRING,
+                "bytes": ColumnType.INTEGER,
+                "created_at": ColumnType.INTEGER,
+                "expires_at": ColumnType.INTEGER,
+                "file_path": ColumnType.STRING,  # Path to actual file on disk
+            },
+        )
+
+    def _generate_file_id(self) -> str:
+        """Generate a unique file ID for OpenAI API."""
+        return f"file-{uuid.uuid4().hex}"
+
+    def _get_file_path(self, file_id: str) -> Path:
+        """Get the filesystem path for a file ID."""
+        return Path(self.config.storage_dir) / file_id
+
+    # OpenAI Files API Implementation
+    async def openai_upload_file(
+        self,
+        file: Annotated[UploadFile, File()],
+        purpose: Annotated[OpenAIFilePurpose, Form()],
+    ) -> OpenAIFileObject:
+        """Upload a file that can be used across various endpoints."""
+        if not self.sql_store:
+            raise RuntimeError("Files provider not initialized")
+
+        file_id = self._generate_file_id()
+        file_path = self._get_file_path(file_id)
+
+        content = await file.read()
+        file_size = len(content)
+
+        with open(file_path, "wb") as f:
+            f.write(content)
+
+        created_at = int(time.time())
+        expires_at = created_at + self.config.ttl_secs
+
+        await self.sql_store.insert(
+            "openai_files",
+            {
+                "id": file_id,
+                "filename": file.filename or "uploaded_file",
+                "purpose": purpose.value,
+                "bytes": file_size,
+                "created_at": created_at,
+                "expires_at": expires_at,
+                "file_path": file_path.as_posix(),
+            },
+        )
+
+        return OpenAIFileObject(
+            id=file_id,
+            filename=file.filename or "uploaded_file",
+            purpose=purpose,
+            bytes=file_size,
+            created_at=created_at,
+            expires_at=expires_at,
+        )
+
+    async def openai_list_files(
+        self,
+        after: str | None = None,
+        limit: int | None = 10000,
+        order: Order | None = Order.desc,
+        purpose: OpenAIFilePurpose | None = None,
+    ) -> ListOpenAIFileResponse:
+        """Returns a list of files that belong to the user's organization."""
+        if not self.sql_store:
+            raise RuntimeError("Files provider not initialized")
+
+        # TODO: Implement 'after' pagination properly
+        if after:
+            raise NotImplementedError("After pagination not yet implemented")
+
+        where = None
+        if purpose:
+            where = {"purpose": purpose.value}
+
+        rows = await self.sql_store.fetch_all(
+            "openai_files",
+            where=where,
+            order_by=[("created_at", order.value if order else Order.desc.value)],
+            limit=limit,
+        )
+
+        files = [
+            OpenAIFileObject(
+                id=row["id"],
+                filename=row["filename"],
+                purpose=OpenAIFilePurpose(row["purpose"]),
+                bytes=row["bytes"],
+                created_at=row["created_at"],
+                expires_at=row["expires_at"],
+            )
+            for row in rows
+        ]
+
+        return ListOpenAIFileResponse(
+            data=files,
+            has_more=False,  # TODO: Implement proper pagination
+            first_id=files[0].id if files else "",
+            last_id=files[-1].id if files else "",
+        )
+
+    async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
+        """Returns information about a specific file."""
+        if not self.sql_store:
+            raise RuntimeError("Files provider not initialized")
+
+        row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
+        if not row:
+            raise ValueError(f"File with id {file_id} not found")
+
+        return OpenAIFileObject(
+            id=row["id"],
+            filename=row["filename"],
+            purpose=OpenAIFilePurpose(row["purpose"]),
+            bytes=row["bytes"],
+            created_at=row["created_at"],
+            expires_at=row["expires_at"],
+        )
+
+    async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
+        """Delete a file."""
+        if not self.sql_store:
+            raise RuntimeError("Files provider not initialized")
+
+        row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
+        if not row:
+            raise ValueError(f"File with id {file_id} not found")
+
+        # Delete physical file
+        file_path = Path(row["file_path"])
+        if file_path.exists():
+            file_path.unlink()
+
+        # Delete metadata from database
+        await self.sql_store.delete("openai_files", where={"id": file_id})
+
+        return OpenAIFileDeleteResponse(
+            id=file_id,
+            deleted=True,
+        )
+
+    async def openai_retrieve_file_content(self, file_id: str) -> Response:
+        """Returns the contents of the specified file."""
+        if not self.sql_store:
+            raise RuntimeError("Files provider not initialized")
+
+        # Get file metadata
+        row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
+        if not row:
+            raise ValueError(f"File with id {file_id} not found")
+
+        # Read file content
+        file_path = Path(row["file_path"])
+        if not file_path.exists():
+            raise ValueError(f"File content not found on disk: {file_path}")
+
+        with open(file_path, "rb") as f:
+            content = f.read()
+
+        # Return as binary response with appropriate content type
+        return Response(
+            content=content,
+            media_type="application/octet-stream",
+            headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'},
+        )
--- a/llama_stack/providers/inline/telemetry/meta_reference/config.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py
@ -30,7 +30,7 @@ class TelemetryConfig(BaseModel):
    )
    service_name: str = Field(
        # service name is always the same, use zero-width space to avoid clutter
-        default="",
+        default="\u200b",
        description="The service name to use for telemetry",
    )
    sinks: list[TelemetrySink] = Field(
@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
        return {
-            "service_name": "${env.OTEL_SERVICE_NAME:}",
+            "service_name": "${env.OTEL_SERVICE_NAME:\u200b}",
            "sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
            "sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
        }
--- a/llama_stack/providers/registry/files.py
+++ b/llama_stack/providers/registry/files.py
@ -4,8 +4,22 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack.providers.datatypes import ProviderSpec
+from llama_stack.providers.datatypes import (
+    Api,
+    InlineProviderSpec,
+    ProviderSpec,
+)
+from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages


 def available_providers() -> list[ProviderSpec]:
-    return []
+    return [
+        InlineProviderSpec(
+            api=Api.files,
+            provider_type="inline::localfs",
+            # TODO: make this dynamic according to the sql store type
+            pip_packages=sql_store_pip_packages,
+            module="llama_stack.providers.inline.files.localfs",
+            config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
+        ),
+    ]
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -15,7 +15,6 @@ from llama_stack.providers.datatypes import (

 META_REFERENCE_DEPS = [
    "accelerate",
-    "blobfile",
    "fairscale",
    "torch",
    "torchvision",
--- a/llama_stack/providers/registry/tool_runtime.py
+++ b/llama_stack/providers/registry/tool_runtime.py
@ -20,7 +20,6 @@ def available_providers() -> list[ProviderSpec]:
            api=Api.tool_runtime,
            provider_type="inline::rag-runtime",
            pip_packages=[
-                "blobfile",
                "chardet",
                "pypdf",
                "tqdm",
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@ -255,7 +255,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
        params = {
            "model": request.model,
            **input_dict,
-            "stream": request.stream,
+            "stream": bool(request.stream),
            **self._build_options(request.sampling_params, request.response_format, request.logprobs),
        }
        logger.debug(f"params to fireworks: {params}")
--- a/llama_stack/providers/remote/inference/ollama/models.py
+++ b/llama_stack/providers/remote/inference/ollama/models.py
@ -12,7 +12,7 @@ from llama_stack.providers.utils.inference.model_registry import (
    build_model_entry,
 )

-model_entries = [
+MODEL_ENTRIES = [
    build_hf_repo_model_entry(
        "llama3.1:8b-instruct-fp16",
        CoreModelId.llama3_1_8b_instruct.value,
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.


+import uuid
 from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any

@ -77,7 +78,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
    request_has_media,
 )

-from .models import model_entries
+from .models import MODEL_ENTRIES

 logger = get_logger(name=__name__, category="inference")

@ -87,7 +88,7 @@ class OllamaInferenceAdapter(
    ModelsProtocolPrivate,
 ):
    def __init__(self, url: str) -> None:
-        self.register_helper = ModelRegistryHelper(model_entries)
+        self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
        self.url = url

    @property
@ -480,7 +481,25 @@ class OllamaInferenceAdapter(
            top_p=top_p,
            user=user,
        )
-        return await self.openai_client.chat.completions.create(**params)  # type: ignore
+        response = await self.openai_client.chat.completions.create(**params)
+        return await self._adjust_ollama_chat_completion_response_ids(response)
+
+    async def _adjust_ollama_chat_completion_response_ids(
+        self,
+        response: OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk],
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        id = f"chatcmpl-{uuid.uuid4()}"
+        if isinstance(response, AsyncIterator):
+
+            async def stream_with_chunk_ids() -> AsyncIterator[OpenAIChatCompletionChunk]:
+                async for chunk in response:
+                    chunk.id = id
+                    yield chunk
+
+            return stream_with_chunk_ids()
+        else:
+            response.id = id
+            return response

    async def batch_completion(
        self,
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@ -72,15 +72,15 @@ class PostgresKVStoreConfig(CommonConfig):
    table_name: str = "llamastack_kvstore"

    @classmethod
-    def sample_run_config(cls, table_name: str = "llamastack_kvstore"):
+    def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
        return {
            "type": "postgres",
            "namespace": None,
            "host": "${env.POSTGRES_HOST:localhost}",
            "port": "${env.POSTGRES_PORT:5432}",
-            "db": "${env.POSTGRES_DB}",
-            "user": "${env.POSTGRES_USER}",
-            "password": "${env.POSTGRES_PASSWORD}",
+            "db": "${env.POSTGRES_DB:llamastack}",
+            "user": "${env.POSTGRES_USER:llamastack}",
+            "password": "${env.POSTGRES_PASSWORD:llamastack}",
            "table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}",
        }

--- a/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlstore.py
@ -16,6 +16,8 @@ from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR

 from .api import SqlStore

+sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
+

 class SqlStoreType(Enum):
    sqlite = "sqlite"
@ -72,6 +74,17 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
    def pip_packages(self) -> list[str]:
        return super().pip_packages + ["asyncpg"]

+    @classmethod
+    def sample_run_config(cls, **kwargs):
+        return cls(
+            type="postgres",
+            host="${env.POSTGRES_HOST:localhost}",
+            port="${env.POSTGRES_PORT:5432}",
+            db="${env.POSTGRES_DB:llamastack}",
+            user="${env.POSTGRES_USER:llamastack}",
+            password="${env.POSTGRES_PASSWORD:llamastack}",
+        )
+

 SqlStoreConfig = Annotated[
    SqliteSqlStoreConfig | PostgresSqlStoreConfig,
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -42,7 +42,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
  eval:
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@ -82,7 +82,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db
  tool_runtime:
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@ -45,7 +45,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db
  eval:
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@ -48,7 +48,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
  eval:
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@ -44,7 +44,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
  eval:
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@ -24,6 +24,8 @@ distribution_spec:
    - inline::basic
    - inline::llm-as-judge
    - inline::braintrust
+    files:
+    - inline::localfs
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import (
    ShieldInput,
    ToolGroupInput,
 )
+from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
@ -36,6 +37,7 @@ def get_distribution_template() -> DistributionTemplate:
        "eval": ["inline::meta-reference"],
        "datasetio": ["remote::huggingface", "inline::localfs"],
        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "files": ["inline::localfs"],
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
@ -62,6 +64,11 @@ def get_distribution_template() -> DistributionTemplate:
        provider_type="inline::faiss",
        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
    )
+    files_provider = Provider(
+        provider_id="meta-reference-files",
+        provider_type="inline::localfs",
+        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )

    available_models = {
        "fireworks": MODEL_ENTRIES,
@ -104,6 +111,7 @@ def get_distribution_template() -> DistributionTemplate:
                provider_overrides={
                    "inference": [inference_provider, embedding_provider],
                    "vector_io": [vector_io_provider],
+                    "files": [files_provider],
                },
                default_models=default_models + [embedding_model],
                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
@ -116,6 +124,7 @@ def get_distribution_template() -> DistributionTemplate:
                        embedding_provider,
                    ],
                    "vector_io": [vector_io_provider],
+                    "files": [files_provider],
                    "safety": [
                        Provider(
                            provider_id="llama-guard",
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -4,6 +4,7 @@ apis:
 - agents
 - datasetio
 - eval
+- files
 - inference
 - safety
 - scoring
@ -53,7 +54,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
  eval:
@ -90,6 +91,14 @@ providers:
    provider_type: inline::braintrust
    config:
      openai_api_key: ${env.OPENAI_API_KEY:}
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -4,6 +4,7 @@ apis:
 - agents
 - datasetio
 - eval
+- files
 - inference
 - safety
 - scoring
@ -48,7 +49,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
  eval:
@ -85,6 +86,14 @@ providers:
    provider_type: inline::braintrust
    config:
      openai_api_key: ${env.OPENAI_API_KEY:}
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@ -48,7 +48,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db
  eval:
@ -112,7 +112,7 @@ models:
  provider_model_id: groq/llama3-8b-8192
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: groq/meta-llama/Llama-3.1-8B-Instruct
  provider_id: groq
  provider_model_id: groq/llama3-8b-8192
  model_type: llm
@ -127,7 +127,7 @@ models:
  provider_model_id: groq/llama3-70b-8192
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3-70B-Instruct
+  model_id: groq/meta-llama/Llama-3-70B-Instruct
  provider_id: groq
  provider_model_id: groq/llama3-70b-8192
  model_type: llm
@ -137,7 +137,7 @@ models:
  provider_model_id: groq/llama-3.3-70b-versatile
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
+  model_id: groq/meta-llama/Llama-3.3-70B-Instruct
  provider_id: groq
  provider_model_id: groq/llama-3.3-70b-versatile
  model_type: llm
@ -147,7 +147,7 @@ models:
  provider_model_id: groq/llama-3.2-3b-preview
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
+  model_id: groq/meta-llama/Llama-3.2-3B-Instruct
  provider_id: groq
  provider_model_id: groq/llama-3.2-3b-preview
  model_type: llm
@ -157,7 +157,7 @@ models:
  provider_model_id: groq/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: groq
  provider_model_id: groq/llama-4-scout-17b-16e-instruct
  model_type: llm
@ -167,7 +167,7 @@ models:
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
@ -177,7 +177,7 @@ models:
  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: groq
  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
  model_type: llm
@ -187,7 +187,7 @@ models:
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@ -53,7 +53,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
  eval:
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -48,7 +48,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
  eval:
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@ -53,7 +53,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
  eval:
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -48,7 +48,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
  eval:
--- a/llama_stack/templates/llama_api/run.yaml
+++ b/llama_stack/templates/llama_api/run.yaml
@ -57,7 +57,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db
  eval:
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -63,7 +63,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
  eval:
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -53,7 +53,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
  eval:
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@ -53,7 +53,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
  eval:
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -48,7 +48,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
  eval:
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -47,7 +47,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
  eval:
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -45,7 +45,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
  eval:
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@ -71,7 +71,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db
  eval:
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@ -53,7 +53,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
  eval:
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@ -48,7 +48,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
  eval:
--- a/llama_stack/templates/postgres-demo/build.yaml
+++ b/llama_stack/templates/postgres-demo/build.yaml
@ -3,8 +3,8 @@ distribution_spec:
  description: Quick start template for running Llama Stack with several popular providers
  providers:
    inference:
-    - remote::fireworks
    - remote::vllm
+    - inline::sentence-transformers
    vector_io:
    - remote::chromadb
    safety:
--- a/llama_stack/templates/postgres-demo/postgres_demo.py
+++ b/llama_stack/templates/postgres-demo/postgres_demo.py
@ -5,64 +5,36 @@
 # the root directory of this source tree.


+from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
    ModelInput,
    Provider,
    ShieldInput,
    ToolGroupInput,
 )
-from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
-from llama_stack.providers.remote.inference.fireworks.models import (
-    MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
-)
+from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
 from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
 from llama_stack.templates.template import (
    DistributionTemplate,
    RunConfigSettings,
-    get_model_registry,
 )


-def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
-    # in this template, we allow each API key to be optional
-    providers = [
-        (
-            "fireworks",
-            FIREWORKS_MODEL_ENTRIES,
-            FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
-        ),
-    ]
-    inference_providers = []
-    available_models = {}
-    for provider_id, model_entries, config in providers:
-        inference_providers.append(
-            Provider(
-                provider_id=provider_id,
-                provider_type=f"remote::{provider_id}",
-                config=config,
-            )
-        )
-        available_models[provider_id] = model_entries
-    inference_providers.append(
+def get_distribution_template() -> DistributionTemplate:
+    inference_providers = [
        Provider(
            provider_id="vllm-inference",
            provider_type="remote::vllm",
            config=VLLMInferenceAdapterConfig.sample_run_config(
                url="${env.VLLM_URL:http://localhost:8000/v1}",
            ),
-        )
-    )
-    return inference_providers, available_models
-
-
-def get_distribution_template() -> DistributionTemplate:
-    inference_providers, available_models = get_inference_providers()
+        ),
+    ]
    providers = {
-        "inference": ([p.provider_type for p in inference_providers]),
+        "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
        "vector_io": ["remote::chromadb"],
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
@ -94,22 +66,26 @@ def get_distribution_template() -> DistributionTemplate:
        ),
    ]

-    default_models = get_model_registry(available_models)
-    default_models.append(
+    default_models = [
        ModelInput(
            model_id="${env.INFERENCE_MODEL}",
            provider_id="vllm-inference",
        )
+    ]
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
    )
-    postgres_config = {
-        "type": "postgres",
-        "host": "${env.POSTGRES_HOST:localhost}",
-        "port": "${env.POSTGRES_PORT:5432}",
-        "db": "${env.POSTGRES_DB:llamastack}",
-        "user": "${env.POSTGRES_USER:llamastack}",
-        "password": "${env.POSTGRES_PASSWORD:llamastack}",
-    }
-
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id=embedding_provider.provider_id,
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+    postgres_config = PostgresSqlStoreConfig.sample_run_config()
    return DistributionTemplate(
        name=name,
        distro_type="self_hosted",
@ -117,11 +93,11 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=None,
        providers=providers,
-        available_models_by_provider=available_models,
+        available_models_by_provider={},
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
-                    "inference": inference_providers,
+                    "inference": inference_providers + [embedding_provider],
                    "vector_io": vector_io_providers,
                    "agents": [
                        Provider(
@ -139,16 +115,17 @@ def get_distribution_template() -> DistributionTemplate:
                            provider_type="inline::meta-reference",
                            config=dict(
                                service_name="${env.OTEL_SERVICE_NAME:}",
-                                sinks="${env.TELEMETRY_SINKS:console}",
+                                sinks="${env.TELEMETRY_SINKS:console,otel_trace}",
+                                otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}",
                            ),
                        )
                    ],
                },
-                default_models=default_models,
+                default_models=default_models + [embedding_model],
                default_tool_groups=default_tool_groups,
                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-                metadata_store=PostgresKVStoreConfig.model_validate(postgres_config),
-                inference_store=PostgresSqlStoreConfig.model_validate(postgres_config),
+                metadata_store=PostgresKVStoreConfig.sample_run_config(),
+                inference_store=postgres_config,
            ),
        },
        run_config_env_vars={
@ -156,9 +133,5 @@ def get_distribution_template() -> DistributionTemplate:
                "8321",
                "Port for the Llama Stack distribution server",
            ),
-            "FIREWORKS_API_KEY": (
-                "",
-                "Fireworks API Key",
-            ),
        },
    )
--- a/llama_stack/templates/postgres-demo/run.yaml
+++ b/llama_stack/templates/postgres-demo/run.yaml
@ -9,11 +9,6 @@ apis:
 - vector_io
 providers:
  inference:
-  - provider_id: fireworks
-    provider_type: remote::fireworks
-    config:
-      url: https://api.fireworks.ai/inference/v1
-      api_key: ${env.FIREWORKS_API_KEY:}
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
@ -21,6 +16,9 @@ providers:
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
  vector_io:
  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
    provider_type: remote::chromadb
@ -54,7 +52,8 @@ providers:
    provider_type: inline::meta-reference
    config:
      service_name: ${env.OTEL_SERVICE_NAME:}
-      sinks: ${env.TELEMETRY_SINKS:console}
+      sinks: ${env.TELEMETRY_SINKS:console,otel_trace}
+      otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
@ -79,7 +78,7 @@ metadata_store:
  db: ${env.POSTGRES_DB:llamastack}
  user: ${env.POSTGRES_USER:llamastack}
  password: ${env.POSTGRES_PASSWORD:llamastack}
-  table_name: llamastack_kvstore
+  table_name: ${env.POSTGRES_TABLE_NAME:llamastack_kvstore}
 inference_store:
  type: postgres
  host: ${env.POSTGRES_HOST:localhost}
@ -88,127 +87,15 @@ inference_store:
  user: ${env.POSTGRES_USER:llamastack}
  password: ${env.POSTGRES_PASSWORD:llamastack}
 models:
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-8b
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: nomic-ai/nomic-embed-text-v1.5
-  provider_id: fireworks
-  provider_model_id: nomic-ai/nomic-embed-text-v1.5
-  model_type: embedding
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm-inference
  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
 shields:
 - shield_id: meta-llama/Llama-Guard-3-8B
 vector_dbs: []
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -91,7 +91,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
  tool_runtime:
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -84,7 +84,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
  tool_runtime:
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@ -58,7 +58,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db
  tool_runtime:
--- a/llama_stack/templates/starter/build.yaml
+++ b/llama_stack/templates/starter/build.yaml
@ -5,10 +5,13 @@ distribution_spec:
    inference:
    - remote::openai
    - remote::fireworks
+    - remote::together
+    - remote::ollama
    - remote::anthropic
    - remote::gemini
    - remote::groq
    - remote::sambanova
+    - remote::vllm
    - inline::sentence-transformers
    vector_io:
    - inline::sqlite-vec
@ -37,4 +40,5 @@ distribution_spec:
 image_type: conda
 additional_pip_packages:
 - aiosqlite
+- asyncpg
 - sqlalchemy[asyncio]
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@ -21,6 +21,15 @@ providers:
    config:
      url: https://api.fireworks.ai/inference/v1
      api_key: ${env.FIREWORKS_API_KEY:}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:}
+  - provider_id: ollama
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:http://localhost:11434}
  - provider_id: anthropic
    provider_type: remote::anthropic
    config:
@ -39,6 +48,13 @@ providers:
    config:
      url: https://api.sambanova.ai/v1
      api_key: ${env.SAMBANOVA_API_KEY:}
+  - provider_id: vllm
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
+      api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
@ -79,7 +95,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db
  eval:
@ -156,72 +172,72 @@ models:
  provider_model_id: openai/chatgpt-4o-latest
  model_type: llm
 - metadata: {}
-  model_id: gpt-3.5-turbo-0125
+  model_id: openai/gpt-3.5-turbo-0125
  provider_id: openai
  provider_model_id: gpt-3.5-turbo-0125
  model_type: llm
 - metadata: {}
-  model_id: gpt-3.5-turbo
+  model_id: openai/gpt-3.5-turbo
  provider_id: openai
  provider_model_id: gpt-3.5-turbo
  model_type: llm
 - metadata: {}
-  model_id: gpt-3.5-turbo-instruct
+  model_id: openai/gpt-3.5-turbo-instruct
  provider_id: openai
  provider_model_id: gpt-3.5-turbo-instruct
  model_type: llm
 - metadata: {}
-  model_id: gpt-4
+  model_id: openai/gpt-4
  provider_id: openai
  provider_model_id: gpt-4
  model_type: llm
 - metadata: {}
-  model_id: gpt-4-turbo
+  model_id: openai/gpt-4-turbo
  provider_id: openai
  provider_model_id: gpt-4-turbo
  model_type: llm
 - metadata: {}
-  model_id: gpt-4o
+  model_id: openai/gpt-4o
  provider_id: openai
  provider_model_id: gpt-4o
  model_type: llm
 - metadata: {}
-  model_id: gpt-4o-2024-08-06
+  model_id: openai/gpt-4o-2024-08-06
  provider_id: openai
  provider_model_id: gpt-4o-2024-08-06
  model_type: llm
 - metadata: {}
-  model_id: gpt-4o-mini
+  model_id: openai/gpt-4o-mini
  provider_id: openai
  provider_model_id: gpt-4o-mini
  model_type: llm
 - metadata: {}
-  model_id: gpt-4o-audio-preview
+  model_id: openai/gpt-4o-audio-preview
  provider_id: openai
  provider_model_id: gpt-4o-audio-preview
  model_type: llm
 - metadata: {}
-  model_id: chatgpt-4o-latest
+  model_id: openai/chatgpt-4o-latest
  provider_id: openai
  provider_model_id: chatgpt-4o-latest
  model_type: llm
 - metadata: {}
-  model_id: o1
+  model_id: openai/o1
  provider_id: openai
  provider_model_id: o1
  model_type: llm
 - metadata: {}
-  model_id: o1-mini
+  model_id: openai/o1-mini
  provider_id: openai
  provider_model_id: o1-mini
  model_type: llm
 - metadata: {}
-  model_id: o3-mini
+  model_id: openai/o3-mini
  provider_id: openai
  provider_model_id: o3-mini
  model_type: llm
 - metadata: {}
-  model_id: o4-mini
+  model_id: openai/o4-mini
  provider_id: openai
  provider_model_id: o4-mini
  model_type: llm
@ -242,14 +258,14 @@ models:
 - metadata:
    embedding_dimension: 1536
    context_length: 8192
-  model_id: text-embedding-3-small
+  model_id: openai/text-embedding-3-small
  provider_id: openai
  provider_model_id: text-embedding-3-small
  model_type: embedding
 - metadata:
    embedding_dimension: 3072
    context_length: 8192
-  model_id: text-embedding-3-large
+  model_id: openai/text-embedding-3-large
  provider_id: openai
  provider_model_id: text-embedding-3-large
  model_type: embedding
@ -259,7 +275,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: fireworks/meta-llama/Llama-3.1-8B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
@ -269,7 +285,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
+  model_id: fireworks/meta-llama/Llama-3.1-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
@ -279,7 +295,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
+  model_id: fireworks/meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
@ -289,7 +305,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
+  model_id: fireworks/meta-llama/Llama-3.2-3B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
@ -299,7 +315,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
+  model_id: fireworks/meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
@ -309,7 +325,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
+  model_id: fireworks/meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
@ -319,7 +335,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
+  model_id: fireworks/meta-llama/Llama-3.3-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
@ -329,7 +345,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
+  model_id: fireworks/meta-llama/Llama-Guard-3-8B
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
@ -339,7 +355,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
+  model_id: fireworks/meta-llama/Llama-Guard-3-11B-Vision
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  model_type: llm
@ -349,7 +365,7 @@ models:
  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_id: fireworks/meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
  model_type: llm
@ -359,17 +375,307 @@ models:
  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  model_id: fireworks/meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
  model_type: llm
 - metadata:
    embedding_dimension: 768
    context_length: 8192
-  model_id: nomic-ai/nomic-embed-text-v1.5
+  model_id: fireworks/nomic-ai/nomic-embed-text-v1.5
  provider_id: fireworks
  provider_model_id: nomic-ai/nomic-embed-text-v1.5
  model_type: embedding
+- metadata: {}
+  model_id: together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.1-8B-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.1-70B-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.1-405B-Instruct-FP8
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.2-3B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.2-3B-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.3-70B-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Meta-Llama-Guard-3-8B
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-Guard-3-8B
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-Guard-3-11B-Vision-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: together
+  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
+  model_type: llm
+- metadata:
+    embedding_dimension: 768
+    context_length: 8192
+  model_id: togethercomputer/m2-bert-80M-8k-retrieval
+  provider_id: together
+  provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval
+  model_type: embedding
+- metadata:
+    embedding_dimension: 768
+    context_length: 32768
+  model_id: togethercomputer/m2-bert-80M-32k-retrieval
+  provider_id: together
+  provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval
+  model_type: embedding
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.1:8b-instruct-fp16
+  provider_id: ollama
+  provider_model_id: llama3.1:8b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-3.1-8B-Instruct
+  provider_id: ollama
+  provider_model_id: llama3.1:8b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.1:8b
+  provider_id: ollama
+  provider_model_id: llama3.1:8b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.1:70b-instruct-fp16
+  provider_id: ollama
+  provider_model_id: llama3.1:70b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-3.1-70B-Instruct
+  provider_id: ollama
+  provider_model_id: llama3.1:70b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.1:70b
+  provider_id: ollama
+  provider_model_id: llama3.1:70b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.1:405b-instruct-fp16
+  provider_id: ollama
+  provider_model_id: llama3.1:405b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-3.1-405B-Instruct-FP8
+  provider_id: ollama
+  provider_model_id: llama3.1:405b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.1:405b
+  provider_id: ollama
+  provider_model_id: llama3.1:405b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.2:1b-instruct-fp16
+  provider_id: ollama
+  provider_model_id: llama3.2:1b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-3.2-1B-Instruct
+  provider_id: ollama
+  provider_model_id: llama3.2:1b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.2:1b
+  provider_id: ollama
+  provider_model_id: llama3.2:1b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.2:3b-instruct-fp16
+  provider_id: ollama
+  provider_model_id: llama3.2:3b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-3.2-3B-Instruct
+  provider_id: ollama
+  provider_model_id: llama3.2:3b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.2:3b
+  provider_id: ollama
+  provider_model_id: llama3.2:3b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.2-vision:11b-instruct-fp16
+  provider_id: ollama
+  provider_model_id: llama3.2-vision:11b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: ollama
+  provider_model_id: llama3.2-vision:11b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.2-vision:latest
+  provider_id: ollama
+  provider_model_id: llama3.2-vision:latest
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.2-vision:90b-instruct-fp16
+  provider_id: ollama
+  provider_model_id: llama3.2-vision:90b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: ollama
+  provider_model_id: llama3.2-vision:90b-instruct-fp16
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.2-vision:90b
+  provider_id: ollama
+  provider_model_id: llama3.2-vision:90b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama3.3:70b
+  provider_id: ollama
+  provider_model_id: llama3.3:70b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-3.3-70B-Instruct
+  provider_id: ollama
+  provider_model_id: llama3.3:70b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama-guard3:8b
+  provider_id: ollama
+  provider_model_id: llama-guard3:8b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-Guard-3-8B
+  provider_id: ollama
+  provider_model_id: llama-guard3:8b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/llama-guard3:1b
+  provider_id: ollama
+  provider_model_id: llama-guard3:1b
+  model_type: llm
+- metadata: {}
+  model_id: ollama/meta-llama/Llama-Guard-3-1B
+  provider_id: ollama
+  provider_model_id: llama-guard3:1b
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+    context_length: 512
+  model_id: ollama/all-minilm:latest
+  provider_id: ollama
+  provider_model_id: all-minilm:latest
+  model_type: embedding
+- metadata:
+    embedding_dimension: 384
+    context_length: 512
+  model_id: ollama/all-minilm
+  provider_id: ollama
+  provider_model_id: all-minilm:latest
+  model_type: embedding
+- metadata:
+    embedding_dimension: 768
+    context_length: 8192
+  model_id: ollama/nomic-embed-text
+  provider_id: ollama
+  provider_model_id: nomic-embed-text
+  model_type: embedding
 - metadata: {}
  model_id: anthropic/claude-3-5-sonnet-latest
  provider_id: anthropic
@ -429,7 +735,7 @@ models:
  provider_model_id: groq/llama3-8b-8192
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: groq/meta-llama/Llama-3.1-8B-Instruct
  provider_id: groq
  provider_model_id: groq/llama3-8b-8192
  model_type: llm
@ -444,7 +750,7 @@ models:
  provider_model_id: groq/llama3-70b-8192
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3-70B-Instruct
+  model_id: groq/meta-llama/Llama-3-70B-Instruct
  provider_id: groq
  provider_model_id: groq/llama3-70b-8192
  model_type: llm
@ -454,7 +760,7 @@ models:
  provider_model_id: groq/llama-3.3-70b-versatile
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
+  model_id: groq/meta-llama/Llama-3.3-70B-Instruct
  provider_id: groq
  provider_model_id: groq/llama-3.3-70b-versatile
  model_type: llm
@ -464,7 +770,7 @@ models:
  provider_model_id: groq/llama-3.2-3b-preview
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
+  model_id: groq/meta-llama/Llama-3.2-3B-Instruct
  provider_id: groq
  provider_model_id: groq/llama-3.2-3b-preview
  model_type: llm
@ -474,7 +780,7 @@ models:
  provider_model_id: groq/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: groq
  provider_model_id: groq/llama-4-scout-17b-16e-instruct
  model_type: llm
@ -484,7 +790,7 @@ models:
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
@ -494,7 +800,7 @@ models:
  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: groq
  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
  model_type: llm
@ -504,7 +810,7 @@ models:
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
@ -514,7 +820,7 @@ models:
  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: sambanova/meta-llama/Llama-3.1-8B-Instruct
  provider_id: sambanova
  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
  model_type: llm
@ -524,7 +830,7 @@ models:
  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
+  model_id: sambanova/meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: sambanova
  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
  model_type: llm
@ -534,7 +840,7 @@ models:
  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-1B-Instruct
+  model_id: sambanova/meta-llama/Llama-3.2-1B-Instruct
  provider_id: sambanova
  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
  model_type: llm
@ -544,7 +850,7 @@ models:
  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
+  model_id: sambanova/meta-llama/Llama-3.2-3B-Instruct
  provider_id: sambanova
  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
  model_type: llm
@ -554,7 +860,7 @@ models:
  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
+  model_id: sambanova/meta-llama/Llama-3.3-70B-Instruct
  provider_id: sambanova
  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
  model_type: llm
@ -564,7 +870,7 @@ models:
  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
+  model_id: sambanova/meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: sambanova
  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
  model_type: llm
@ -574,7 +880,7 @@ models:
  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
+  model_id: sambanova/meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: sambanova
  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
  model_type: llm
@ -584,7 +890,7 @@ models:
  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_id: sambanova/meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: sambanova
  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
  model_type: llm
@ -594,7 +900,7 @@ models:
  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  model_id: sambanova/meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: sambanova
  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
  model_type: llm
@ -604,7 +910,7 @@ models:
  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
+  model_id: sambanova/meta-llama/Llama-Guard-3-8B
  provider_id: sambanova
  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
  model_type: llm
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@ -34,6 +34,10 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig
 from llama_stack.providers.remote.inference.groq.models import (
    MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
 )
+from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
+from llama_stack.providers.remote.inference.ollama.models import (
+    MODEL_ENTRIES as OLLAMA_MODEL_ENTRIES,
+)
 from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.models import (
    MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
@ -42,11 +46,17 @@ from llama_stack.providers.remote.inference.sambanova.config import SambaNovaImp
 from llama_stack.providers.remote.inference.sambanova.models import (
    MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES,
 )
+from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
+from llama_stack.providers.remote.inference.together.models import (
+    MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
+)
+from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.config import (
    PGVectorVectorIOConfig,
 )
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
+from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
 from llama_stack.templates.template import (
    DistributionTemplate,
    RunConfigSettings,
@ -67,6 +77,16 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
            FIREWORKS_MODEL_ENTRIES,
            FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
        ),
+        (
+            "together",
+            TOGETHER_MODEL_ENTRIES,
+            TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
+        ),
+        (
+            "ollama",
+            OLLAMA_MODEL_ENTRIES,
+            OllamaImplConfig.sample_run_config(),
+        ),
        (
            "anthropic",
            ANTHROPIC_MODEL_ENTRIES,
@ -87,6 +107,13 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
            SAMBANOVA_MODEL_ENTRIES,
            SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
        ),
+        (
+            "vllm",
+            [],
+            VLLMInferenceAdapterConfig.sample_run_config(
+                url="${env.VLLM_URL:http://localhost:8000/v1}",
+            ),
+        ),
    ]
    inference_providers = []
    available_models = {}
@ -169,6 +196,8 @@ def get_distribution_template() -> DistributionTemplate:
    )

    default_models = get_model_registry(available_models)
+
+    postgres_store = PostgresSqlStoreConfig.sample_run_config()
    return DistributionTemplate(
        name=name,
        distro_type="self_hosted",
@ -177,6 +206,7 @@ def get_distribution_template() -> DistributionTemplate:
        template_path=None,
        providers=providers,
        available_models_by_provider=available_models,
+        additional_pip_packages=postgres_store.pip_packages,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
@ -201,5 +231,25 @@ def get_distribution_template() -> DistributionTemplate:
                "",
                "OpenAI API Key",
            ),
+            "GROQ_API_KEY": (
+                "",
+                "Groq API Key",
+            ),
+            "ANTHROPIC_API_KEY": (
+                "",
+                "Anthropic API Key",
+            ),
+            "GEMINI_API_KEY": (
+                "",
+                "Gemini API Key",
+            ),
+            "SAMBANOVA_API_KEY": (
+                "",
+                "SambaNova API Key",
+            ),
+            "VLLM_URL": (
+                "http://localhost:8000/v1",
+                "VLLM URL",
+            ),
        },
    )
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -8,6 +8,7 @@ from pathlib import Path
 from typing import Literal

 import jinja2
+import rich
 import yaml
 from pydantic import BaseModel, Field

@ -36,13 +37,35 @@ def get_model_registry(
    available_models: dict[str, list[ProviderModelEntry]],
 ) -> list[ModelInput]:
    models = []
+
+    # check for conflicts in model ids
+    all_ids = set()
+    ids_conflict = False
+
+    for _, entries in available_models.items():
+        for entry in entries:
+            ids = [entry.provider_model_id] + entry.aliases
+            for model_id in ids:
+                if model_id in all_ids:
+                    ids_conflict = True
+                    rich.print(
+                        f"[yellow]Model id {model_id} conflicts; all model ids will be prefixed with provider id[/yellow]"
+                    )
+                    break
+            all_ids.update(ids)
+            if ids_conflict:
+                break
+        if ids_conflict:
+            break
+
    for provider_id, entries in available_models.items():
        for entry in entries:
            ids = [entry.provider_model_id] + entry.aliases
            for model_id in ids:
+                identifier = f"{provider_id}/{model_id}" if ids_conflict and provider_id not in model_id else model_id
                models.append(
                    ModelInput(
-                        model_id=model_id,
+                        model_id=identifier,
                        provider_model_id=entry.provider_model_id,
                        provider_id=provider_id,
                        model_type=entry.model_type,
@ -154,6 +177,11 @@ class DistributionTemplate(BaseModel):

    available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None

+    # we may want to specify additional pip packages without necessarily indicating a
+    # specific "default" inference store (which is what typically used to dictate additional
+    # pip packages)
+    additional_pip_packages: list[str] | None = None
+
    def build_config(self) -> BuildConfig:
        additional_pip_packages: list[str] = []
        for run_config in self.run_configs.values():
@ -161,6 +189,9 @@ class DistributionTemplate(BaseModel):
            if run_config_.inference_store:
                additional_pip_packages.extend(run_config_.inference_store.pip_packages)

+        if self.additional_pip_packages:
+            additional_pip_packages.extend(self.additional_pip_packages)
+
        return BuildConfig(
            distribution_spec=DistributionSpec(
                description=self.description,
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -48,7 +48,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
  eval:
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -47,7 +47,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
  eval:
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -53,7 +53,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
  eval:
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -48,7 +48,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
  eval:
--- a/llama_stack/templates/verification/build.yaml
+++ b/llama_stack/templates/verification/build.yaml
@ -1,40 +0,0 @@
-version: '2'
-distribution_spec:
-  description: Distribution for running e2e tests in CI
-  providers:
-    inference:
-    - remote::openai
-    - remote::fireworks-openai-compat
-    - remote::together-openai-compat
-    - remote::groq-openai-compat
-    - remote::sambanova-openai-compat
-    - remote::cerebras-openai-compat
-    - inline::sentence-transformers
-    vector_io:
-    - inline::sqlite-vec
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-image_type: conda
-additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@ -1,731 +0,0 @@
-version: '2'
-image_name: verification
-apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
-providers:
-  inference:
-  - provider_id: openai
-    provider_type: remote::openai
-    config:
-      api_key: ${env.OPENAI_API_KEY:}
-  - provider_id: fireworks-openai-compat
-    provider_type: remote::fireworks-openai-compat
-    config:
-      openai_compat_api_base: https://api.fireworks.ai/inference/v1
-      api_key: ${env.FIREWORKS_API_KEY:}
-  - provider_id: together-openai-compat
-    provider_type: remote::together-openai-compat
-    config:
-      openai_compat_api_base: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:}
-  - provider_id: groq-openai-compat
-    provider_type: remote::groq-openai-compat
-    config:
-      openai_compat_api_base: https://api.groq.com/openai/v1
-      api_key: ${env.GROQ_API_KEY:}
-  - provider_id: sambanova-openai-compat
-    provider_type: remote::sambanova-openai-compat
-    config:
-      openai_compat_api_base: https://api.sambanova.ai/v1
-      api_key: ${env.SAMBANOVA_API_KEY:}
-  - provider_id: cerebras-openai-compat
-    provider_type: remote::cerebras-openai-compat
-    config:
-      openai_compat_api_base: https://api.cerebras.ai/v1
-      api_key: ${env.CEREBRAS_API_KEY:}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: sqlite-vec
-    provider_type: inline::sqlite-vec
-    config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/sqlite_vec.db
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
-    provider_type: remote::chromadb
-    config:
-      url: ${env.CHROMADB_URL:}
-  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
-    provider_type: remote::pgvector
-    config:
-      host: ${env.PGVECTOR_HOST:localhost}
-      port: ${env.PGVECTOR_PORT:5432}
-      db: ${env.PGVECTOR_DB:}
-      user: ${env.PGVECTOR_USER:}
-      password: ${env.PGVECTOR_PASSWORD:}
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/inference_store.db
-models:
- metadata: {}
-  model_id: openai/gpt-4o
-  provider_id: openai
-  provider_model_id: openai/gpt-4o
-  model_type: llm
- metadata: {}
-  model_id: openai/gpt-4o-mini
-  provider_id: openai
-  provider_model_id: openai/gpt-4o-mini
-  model_type: llm
- metadata: {}
-  model_id: openai/chatgpt-4o-latest
-  provider_id: openai
-  provider_model_id: openai/chatgpt-4o-latest
-  model_type: llm
- metadata: {}
-  model_id: gpt-3.5-turbo-0125
-  provider_id: openai
-  provider_model_id: gpt-3.5-turbo-0125
-  model_type: llm
- metadata: {}
-  model_id: gpt-3.5-turbo
-  provider_id: openai
-  provider_model_id: gpt-3.5-turbo
-  model_type: llm
- metadata: {}
-  model_id: gpt-3.5-turbo-instruct
-  provider_id: openai
-  provider_model_id: gpt-3.5-turbo-instruct
-  model_type: llm
- metadata: {}
-  model_id: gpt-4
-  provider_id: openai
-  provider_model_id: gpt-4
-  model_type: llm
- metadata: {}
-  model_id: gpt-4-turbo
-  provider_id: openai
-  provider_model_id: gpt-4-turbo
-  model_type: llm
- metadata: {}
-  model_id: gpt-4o
-  provider_id: openai
-  provider_model_id: gpt-4o
-  model_type: llm
- metadata: {}
-  model_id: gpt-4o-2024-08-06
-  provider_id: openai
-  provider_model_id: gpt-4o-2024-08-06
-  model_type: llm
- metadata: {}
-  model_id: gpt-4o-mini
-  provider_id: openai
-  provider_model_id: gpt-4o-mini
-  model_type: llm
- metadata: {}
-  model_id: gpt-4o-audio-preview
-  provider_id: openai
-  provider_model_id: gpt-4o-audio-preview
-  model_type: llm
- metadata: {}
-  model_id: chatgpt-4o-latest
-  provider_id: openai
-  provider_model_id: chatgpt-4o-latest
-  model_type: llm
- metadata: {}
-  model_id: o1
-  provider_id: openai
-  provider_model_id: o1
-  model_type: llm
- metadata: {}
-  model_id: o1-mini
-  provider_id: openai
-  provider_model_id: o1-mini
-  model_type: llm
- metadata: {}
-  model_id: o3-mini
-  provider_id: openai
-  provider_model_id: o3-mini
-  model_type: llm
- metadata: {}
-  model_id: o4-mini
-  provider_id: openai
-  provider_model_id: o4-mini
-  model_type: llm
- metadata:
-    embedding_dimension: 1536
-    context_length: 8192
-  model_id: openai/text-embedding-3-small
-  provider_id: openai
-  provider_model_id: openai/text-embedding-3-small
-  model_type: embedding
- metadata:
-    embedding_dimension: 3072
-    context_length: 8192
-  model_id: openai/text-embedding-3-large
-  provider_id: openai
-  provider_model_id: openai/text-embedding-3-large
-  model_type: embedding
- metadata:
-    embedding_dimension: 1536
-    context_length: 8192
-  model_id: text-embedding-3-small
-  provider_id: openai
-  provider_model_id: text-embedding-3-small
-  model_type: embedding
- metadata:
-    embedding_dimension: 3072
-    context_length: 8192
-  model_id: text-embedding-3-large
-  provider_id: openai
-  provider_model_id: text-embedding-3-large
-  model_type: embedding
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-8b
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
- metadata: {}
-  model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: fireworks-openai-compat
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: nomic-ai/nomic-embed-text-v1.5
-  provider_id: fireworks-openai-compat
-  provider_model_id: nomic-ai/nomic-embed-text-v1.5
-  model_type: embedding
- metadata: {}
-  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Meta-Llama-Guard-3-8B
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  model_type: llm
- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: togethercomputer/m2-bert-80M-8k-retrieval
-  provider_id: together-openai-compat
-  provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval
-  model_type: embedding
- metadata:
-    embedding_dimension: 768
-    context_length: 32768
-  model_id: togethercomputer/m2-bert-80M-32k-retrieval
-  provider_id: together-openai-compat
-  provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval
-  model_type: embedding
- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
- metadata: {}
-  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
- metadata: {}
-  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  provider_id: together-openai-compat
-  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
- metadata: {}
-  model_id: groq/llama3-8b-8192
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama3-8b-8192
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama3-8b-8192
-  model_type: llm
- metadata: {}
-  model_id: groq/llama-3.1-8b-instant
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama-3.1-8b-instant
-  model_type: llm
- metadata: {}
-  model_id: groq/llama3-70b-8192
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama3-70b-8192
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3-70B-Instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama3-70b-8192
-  model_type: llm
- metadata: {}
-  model_id: groq/llama-3.3-70b-versatile
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama-3.3-70b-versatile
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama-3.3-70b-versatile
-  model_type: llm
- metadata: {}
-  model_id: groq/llama-3.2-3b-preview
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama-3.2-3b-preview
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama-3.2-3b-preview
-  model_type: llm
- metadata: {}
-  model_id: groq/llama-4-scout-17b-16e-instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama-4-scout-17b-16e-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama-4-scout-17b-16e-instruct
-  model_type: llm
- metadata: {}
-  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
-  model_type: llm
- metadata: {}
-  model_id: groq/llama-4-maverick-17b-128e-instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
-  model_type: llm
- metadata: {}
-  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: groq-openai-compat
-  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Meta-Llama-3.1-8B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Meta-Llama-3.1-405B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Meta-Llama-3.2-1B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-1B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Meta-Llama-3.2-3B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Meta-Llama-3.3-70B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Llama-3.2-11B-Vision-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Llama-3.2-90B-Vision-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
-  model_type: llm
- metadata: {}
-  model_id: sambanova/Meta-Llama-Guard-3-8B
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: sambanova-openai-compat
-  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
-  model_type: llm
- metadata: {}
-  model_id: llama3.1-8b
-  provider_id: cerebras-openai-compat
-  provider_model_id: llama3.1-8b
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: cerebras-openai-compat
-  provider_model_id: llama3.1-8b
-  model_type: llm
- metadata: {}
-  model_id: llama-3.3-70b
-  provider_id: cerebras-openai-compat
-  provider_model_id: llama-3.3-70b
-  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: cerebras-openai-compat
-  provider_model_id: llama-3.3-70b
-  model_type: llm
- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
- shield_id: meta-llama/Llama-Guard-3-8B
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
--- a/llama_stack/templates/verification/verification.py
+++ b/llama_stack/templates/verification/verification.py
@ -1,201 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.apis.models.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
-    SQLiteVectorIOConfig,
-)
-from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES as CEREBRAS_MODEL_ENTRIES
-from llama_stack.providers.remote.inference.cerebras_openai_compat.config import CerebrasCompatConfig
-from llama_stack.providers.remote.inference.fireworks.models import (
-    MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
-)
-from llama_stack.providers.remote.inference.fireworks_openai_compat.config import FireworksCompatConfig
-from llama_stack.providers.remote.inference.groq.models import (
-    MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
-)
-from llama_stack.providers.remote.inference.groq_openai_compat.config import GroqCompatConfig
-from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
-from llama_stack.providers.remote.inference.openai.models import (
-    MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
-)
-from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES
-from llama_stack.providers.remote.inference.sambanova_openai_compat.config import SambaNovaCompatConfig
-from llama_stack.providers.remote.inference.together.models import (
-    MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
-)
-from llama_stack.providers.remote.inference.together_openai_compat.config import TogetherCompatConfig
-from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.remote.vector_io.pgvector.config import (
-    PGVectorVectorIOConfig,
-)
-from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
-from llama_stack.templates.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-
-
-def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
-    # in this template, we allow each API key to be optional
-    providers = [
-        (
-            "openai",
-            OPENAI_MODEL_ENTRIES,
-            OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"),
-        ),
-        (
-            "fireworks-openai-compat",
-            FIREWORKS_MODEL_ENTRIES,
-            FireworksCompatConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
-        ),
-        (
-            "together-openai-compat",
-            TOGETHER_MODEL_ENTRIES,
-            TogetherCompatConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
-        ),
-        (
-            "groq-openai-compat",
-            GROQ_MODEL_ENTRIES,
-            GroqCompatConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
-        ),
-        (
-            "sambanova-openai-compat",
-            SAMBANOVA_MODEL_ENTRIES,
-            SambaNovaCompatConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
-        ),
-        (
-            "cerebras-openai-compat",
-            CEREBRAS_MODEL_ENTRIES,
-            CerebrasCompatConfig.sample_run_config(api_key="${env.CEREBRAS_API_KEY:}"),
-        ),
-    ]
-    inference_providers = []
-    available_models = {}
-    for provider_id, model_entries, config in providers:
-        inference_providers.append(
-            Provider(
-                provider_id=provider_id,
-                provider_type=f"remote::{provider_id}",
-                config=config,
-            )
-        )
-        available_models[provider_id] = model_entries
-    return inference_providers, available_models
-
-
-def get_distribution_template() -> DistributionTemplate:
-    inference_providers, available_models = get_inference_providers()
-    providers = {
-        "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
-        "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
-    }
-    name = "verification"
-
-    vector_io_providers = [
-        Provider(
-            provider_id="sqlite-vec",
-            provider_type="inline::sqlite-vec",
-            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-        ),
-        Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
-            provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
-        ),
-        Provider(
-            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
-            provider_type="remote::pgvector",
-            config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:}",
-                user="${env.PGVECTOR_USER:}",
-                password="${env.PGVECTOR_PASSWORD:}",
-            ),
-        ),
-    ]
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id=embedding_provider.provider_id,
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-
-    default_models = get_model_registry(available_models)
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Distribution for running e2e tests in CI",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": inference_providers + [embedding_provider],
-                    "vector_io": vector_io_providers,
-                },
-                default_models=default_models + [embedding_model],
-                default_tool_groups=default_tool_groups,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "FIREWORKS_API_KEY": (
-                "",
-                "Fireworks API Key",
-            ),
-            "OPENAI_API_KEY": (
-                "",
-                "OpenAI API Key",
-            ),
-        },
-    )
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -52,7 +52,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db
  eval:
--- a/llama_stack/templates/watsonx/run.yaml
+++ b/llama_stack/templates/watsonx/run.yaml
@ -49,7 +49,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db
  eval:
--- a/llama_stack/ui/app/api/v1/[...path]/route.ts
+++ b/llama_stack/ui/app/api/v1/[...path]/route.ts
@ -0,0 +1,105 @@
+import { NextRequest, NextResponse } from "next/server";
+
+// Get backend URL from environment variable or default to localhost for development
+const BACKEND_URL =
+  process.env.LLAMA_STACK_BACKEND_URL ||
+  `http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`;
+
+async function proxyRequest(request: NextRequest, method: string) {
+  try {
+    // Extract the path from the request URL
+    const url = new URL(request.url);
+    const pathSegments = url.pathname.split("/");
+
+    // Remove /api from the path to get the actual API path
+    // /api/v1/models/list -> /v1/models/list
+    const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment
+    const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`;
+
+    console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`);
+
+    // Prepare headers (exclude host and other problematic headers)
+    const headers = new Headers();
+    request.headers.forEach((value, key) => {
+      // Skip headers that might cause issues in proxy
+      if (
+        !["host", "connection", "content-length"].includes(key.toLowerCase())
+      ) {
+        headers.set(key, value);
+      }
+    });
+
+    // Prepare the request options
+    const requestOptions: RequestInit = {
+      method,
+      headers,
+    };
+
+    // Add body for methods that support it
+    if (["POST", "PUT", "PATCH"].includes(method) && request.body) {
+      requestOptions.body = await request.text();
+    }
+
+    // Make the request to FastAPI backend
+    const response = await fetch(targetUrl, requestOptions);
+
+    // Get response data
+    const responseText = await response.text();
+
+    console.log(
+      `Response from FastAPI: ${response.status} ${response.statusText}`,
+    );
+
+    // Create response with same status and headers
+    const proxyResponse = new NextResponse(responseText, {
+      status: response.status,
+      statusText: response.statusText,
+    });
+
+    // Copy response headers (except problematic ones)
+    response.headers.forEach((value, key) => {
+      if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) {
+        proxyResponse.headers.set(key, value);
+      }
+    });
+
+    return proxyResponse;
+  } catch (error) {
+    console.error("Proxy request failed:", error);
+
+    return NextResponse.json(
+      {
+        error: "Proxy request failed",
+        message: error instanceof Error ? error.message : "Unknown error",
+        backend_url: BACKEND_URL,
+        timestamp: new Date().toISOString(),
+      },
+      { status: 500 },
+    );
+  }
+}
+
+// HTTP method handlers
+export async function GET(request: NextRequest) {
+  return proxyRequest(request, "GET");
+}
+
+export async function POST(request: NextRequest) {
+  return proxyRequest(request, "POST");
+}
+
+export async function PUT(request: NextRequest) {
+  return proxyRequest(request, "PUT");
+}
+
+export async function DELETE(request: NextRequest) {
+  return proxyRequest(request, "DELETE");
+}
+
+export async function PATCH(request: NextRequest) {
+  return proxyRequest(request, "PATCH");
+}
+
+export async function OPTIONS(request: NextRequest) {
+  return proxyRequest(request, "OPTIONS");
+}
--- a/llama_stack/ui/lib/client.ts
+++ b/llama_stack/ui/lib/client.ts
@ -1,12 +1,6 @@
 import LlamaStackClient from "llama-stack-client";
-import OpenAI from "openai";

-export const client =
-  process.env.NEXT_PUBLIC_USE_OPENAI_CLIENT === "true" // useful for testing
-    ? new OpenAI({
-        apiKey: process.env.NEXT_PUBLIC_OPENAI_API_KEY,
-        dangerouslyAllowBrowser: true,
-      })
-    : new LlamaStackClient({
-        baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL,
-      });
+export const client = new LlamaStackClient({
+  baseURL:
+    typeof window !== "undefined" ? `${window.location.origin}/api` : "/api",
+});
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@ -15,11 +15,10 @@
        "@radix-ui/react-tooltip": "^1.2.6",
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
-        "llama-stack-client": "github:stainless-sdks/llama-stack-node#ehhuang/dev",
+        "llama-stack-client": "0.2.9",
        "lucide-react": "^0.510.0",
        "next": "15.3.2",
        "next-themes": "^0.4.6",
-        "openai": "^4.103.0",
        "react": "^19.0.0",
        "react-dom": "^19.0.0",
        "tailwind-merge": "^3.3.0"
@ -677,6 +676,406 @@
        "tslib": "^2.4.0"
      }
    },
+    "node_modules/@esbuild/aix-ppc64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz",
+      "integrity": "sha512-9o3TMmpmftaCMepOdA5k/yDw8SfInyzWWTjYTFCX3kPSDJMROQTb8jg+h9Cnwnmm1vOzvxN7gIfB5V2ewpjtGA==",
+      "cpu": [
+        "ppc64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "aix"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/android-arm": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.5.tgz",
+      "integrity": "sha512-AdJKSPeEHgi7/ZhuIPtcQKr5RQdo6OO2IL87JkianiMYMPbCtot9fxPbrMiBADOWWm3T2si9stAiVsGbTQFkbA==",
+      "cpu": [
+        "arm"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/android-arm64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.5.tgz",
+      "integrity": "sha512-VGzGhj4lJO+TVGV1v8ntCZWJktV7SGCs3Pn1GRWI1SBFtRALoomm8k5E9Pmwg3HOAal2VDc2F9+PM/rEY6oIDg==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/android-x64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.5.tgz",
+      "integrity": "sha512-D2GyJT1kjvO//drbRT3Hib9XPwQeWd9vZoBJn+bu/lVsOZ13cqNdDeqIF/xQ5/VmWvMduP6AmXvylO/PIc2isw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/darwin-arm64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.5.tgz",
+      "integrity": "sha512-GtaBgammVvdF7aPIgH2jxMDdivezgFu6iKpmT+48+F8Hhg5J/sfnDieg0aeG/jfSvkYQU2/pceFPDKlqZzwnfQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/darwin-x64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.5.tgz",
+      "integrity": "sha512-1iT4FVL0dJ76/q1wd7XDsXrSW+oLoquptvh4CLR4kITDtqi2e/xwXwdCVH8hVHU43wgJdsq7Gxuzcs6Iq/7bxQ==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/freebsd-arm64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.5.tgz",
+      "integrity": "sha512-nk4tGP3JThz4La38Uy/gzyXtpkPW8zSAmoUhK9xKKXdBCzKODMc2adkB2+8om9BDYugz+uGV7sLmpTYzvmz6Sw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/freebsd-x64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.5.tgz",
+      "integrity": "sha512-PrikaNjiXdR2laW6OIjlbeuCPrPaAl0IwPIaRv+SMV8CiM8i2LqVUHFC1+8eORgWyY7yhQY+2U2fA55mBzReaw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-arm": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.5.tgz",
+      "integrity": "sha512-cPzojwW2okgh7ZlRpcBEtsX7WBuqbLrNXqLU89GxWbNt6uIg78ET82qifUy3W6OVww6ZWobWub5oqZOVtwolfw==",
+      "cpu": [
+        "arm"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-arm64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.5.tgz",
+      "integrity": "sha512-Z9kfb1v6ZlGbWj8EJk9T6czVEjjq2ntSYLY2cw6pAZl4oKtfgQuS4HOq41M/BcoLPzrUbNd+R4BXFyH//nHxVg==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-ia32": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.5.tgz",
+      "integrity": "sha512-sQ7l00M8bSv36GLV95BVAdhJ2QsIbCuCjh/uYrWiMQSUuV+LpXwIqhgJDcvMTj+VsQmqAHL2yYaasENvJ7CDKA==",
+      "cpu": [
+        "ia32"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-loong64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.5.tgz",
+      "integrity": "sha512-0ur7ae16hDUC4OL5iEnDb0tZHDxYmuQyhKhsPBV8f99f6Z9KQM02g33f93rNH5A30agMS46u2HP6qTdEt6Q1kg==",
+      "cpu": [
+        "loong64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-mips64el": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.5.tgz",
+      "integrity": "sha512-kB/66P1OsHO5zLz0i6X0RxlQ+3cu0mkxS3TKFvkb5lin6uwZ/ttOkP3Z8lfR9mJOBk14ZwZ9182SIIWFGNmqmg==",
+      "cpu": [
+        "mips64el"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-ppc64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.5.tgz",
+      "integrity": "sha512-UZCmJ7r9X2fe2D6jBmkLBMQetXPXIsZjQJCjgwpVDz+YMcS6oFR27alkgGv3Oqkv07bxdvw7fyB71/olceJhkQ==",
+      "cpu": [
+        "ppc64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-riscv64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.5.tgz",
+      "integrity": "sha512-kTxwu4mLyeOlsVIFPfQo+fQJAV9mh24xL+y+Bm6ej067sYANjyEw1dNHmvoqxJUCMnkBdKpvOn0Ahql6+4VyeA==",
+      "cpu": [
+        "riscv64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-s390x": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.5.tgz",
+      "integrity": "sha512-K2dSKTKfmdh78uJ3NcWFiqyRrimfdinS5ErLSn3vluHNeHVnBAFWC8a4X5N+7FgVE1EjXS1QDZbpqZBjfrqMTQ==",
+      "cpu": [
+        "s390x"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-x64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.5.tgz",
+      "integrity": "sha512-uhj8N2obKTE6pSZ+aMUbqq+1nXxNjZIIjCjGLfsWvVpy7gKCOL6rsY1MhRh9zLtUtAI7vpgLMK6DxjO8Qm9lJw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/netbsd-arm64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.5.tgz",
+      "integrity": "sha512-pwHtMP9viAy1oHPvgxtOv+OkduK5ugofNTVDilIzBLpoWAM16r7b/mxBvfpuQDpRQFMfuVr5aLcn4yveGvBZvw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "netbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/netbsd-x64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.5.tgz",
+      "integrity": "sha512-WOb5fKrvVTRMfWFNCroYWWklbnXH0Q5rZppjq0vQIdlsQKuw6mdSihwSo4RV/YdQ5UCKKvBy7/0ZZYLBZKIbwQ==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "netbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/openbsd-arm64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.5.tgz",
+      "integrity": "sha512-7A208+uQKgTxHd0G0uqZO8UjK2R0DDb4fDmERtARjSHWxqMTye4Erz4zZafx7Di9Cv+lNHYuncAkiGFySoD+Mw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/openbsd-x64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.5.tgz",
+      "integrity": "sha512-G4hE405ErTWraiZ8UiSoesH8DaCsMm0Cay4fsFWOOUcz8b8rC6uCvnagr+gnioEjWn0wC+o1/TAHt+It+MpIMg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/sunos-x64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.5.tgz",
+      "integrity": "sha512-l+azKShMy7FxzY0Rj4RCt5VD/q8mG/e+mDivgspo+yL8zW7qEwctQ6YqKX34DTEleFAvCIUviCFX1SDZRSyMQA==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "sunos"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/win32-arm64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.5.tgz",
+      "integrity": "sha512-O2S7SNZzdcFG7eFKgvwUEZ2VG9D/sn/eIiz8XRZ1Q/DO5a3s76Xv0mdBzVM5j5R639lXQmPmSo0iRpHqUUrsxw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/win32-ia32": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.5.tgz",
+      "integrity": "sha512-onOJ02pqs9h1iMJ1PQphR+VZv8qBMQ77Klcsqv9CNW2w6yLqoURLcgERAIurY6QE63bbLuqgP9ATqajFLK5AMQ==",
+      "cpu": [
+        "ia32"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/win32-x64": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.5.tgz",
+      "integrity": "sha512-TXv6YnJ8ZMVdX+SXWVBo/0p8LTcrUYngpWjvm91TMjjBQii7Oz11Lw5lbDV5Y0TzuhSJHwiH4hEtC1I42mMS0g==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
    "node_modules/@eslint-community/eslint-utils": {
      "version": "4.7.0",
      "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz",
@ -5601,6 +6000,46 @@
        "url": "https://github.com/sponsors/ljharb"
      }
    },
+    "node_modules/esbuild": {
+      "version": "0.25.5",
+      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.5.tgz",
+      "integrity": "sha512-P8OtKZRv/5J5hhz0cUAdu/cLuPIKXpQl1R9pZtvmHWQvrAUVd0UNIPT4IB4W3rNOqVO0rlqHmCIbSwxh/c9yUQ==",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "bin": {
+        "esbuild": "bin/esbuild"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "optionalDependencies": {
+        "@esbuild/aix-ppc64": "0.25.5",
+        "@esbuild/android-arm": "0.25.5",
+        "@esbuild/android-arm64": "0.25.5",
+        "@esbuild/android-x64": "0.25.5",
+        "@esbuild/darwin-arm64": "0.25.5",
+        "@esbuild/darwin-x64": "0.25.5",
+        "@esbuild/freebsd-arm64": "0.25.5",
+        "@esbuild/freebsd-x64": "0.25.5",
+        "@esbuild/linux-arm": "0.25.5",
+        "@esbuild/linux-arm64": "0.25.5",
+        "@esbuild/linux-ia32": "0.25.5",
+        "@esbuild/linux-loong64": "0.25.5",
+        "@esbuild/linux-mips64el": "0.25.5",
+        "@esbuild/linux-ppc64": "0.25.5",
+        "@esbuild/linux-riscv64": "0.25.5",
+        "@esbuild/linux-s390x": "0.25.5",
+        "@esbuild/linux-x64": "0.25.5",
+        "@esbuild/netbsd-arm64": "0.25.5",
+        "@esbuild/netbsd-x64": "0.25.5",
+        "@esbuild/openbsd-arm64": "0.25.5",
+        "@esbuild/openbsd-x64": "0.25.5",
+        "@esbuild/sunos-x64": "0.25.5",
+        "@esbuild/win32-arm64": "0.25.5",
+        "@esbuild/win32-ia32": "0.25.5",
+        "@esbuild/win32-x64": "0.25.5"
+      }
+    },
    "node_modules/escalade": {
      "version": "3.2.0",
      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
@ -6555,7 +6994,6 @@
      "version": "2.3.3",
      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
-      "dev": true,
      "hasInstallScript": true,
      "license": "MIT",
      "optional": true,
@ -6717,7 +7155,6 @@
      "version": "4.10.0",
      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz",
      "integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==",
-      "dev": true,
      "license": "MIT",
      "dependencies": {
        "resolve-pkg-maps": "^1.0.0"
@ -9092,8 +9529,9 @@
      "license": "MIT"
    },
    "node_modules/llama-stack-client": {
-      "version": "0.0.1-alpha.0",
-      "resolved": "git+ssh://git@github.com/stainless-sdks/llama-stack-node.git#5d34d229fb53b6dad02da0f19f4b310b529c6b15",
+      "version": "0.2.9",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.9.tgz",
+      "integrity": "sha512-7+2WuPYt2j/k/Twh5IGn8hd8q4W6lVEK+Ql4PpICGLj4N8YmooCfydI1UvdT2UlX7PNYKNeyeFqTifWT2MjWKg==",
      "license": "Apache-2.0",
      "dependencies": {
        "@types/node": "^18.11.18",
@ -9102,7 +9540,8 @@
        "agentkeepalive": "^4.2.1",
        "form-data-encoder": "1.7.2",
        "formdata-node": "^4.3.2",
-        "node-fetch": "^2.6.7"
+        "node-fetch": "^2.6.7",
+        "tsx": "^4.19.2"
      }
    },
    "node_modules/llama-stack-client/node_modules/@types/node": {
@ -9805,51 +10244,6 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
-    "node_modules/openai": {
-      "version": "4.103.0",
-      "resolved": "https://registry.npmjs.org/openai/-/openai-4.103.0.tgz",
-      "integrity": "sha512-eWcz9kdurkGOFDtd5ySS5y251H2uBgq9+1a2lTBnjMMzlexJ40Am5t6Mu76SSE87VvitPa0dkIAp75F+dZVC0g==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "@types/node": "^18.11.18",
-        "@types/node-fetch": "^2.6.4",
-        "abort-controller": "^3.0.0",
-        "agentkeepalive": "^4.2.1",
-        "form-data-encoder": "1.7.2",
-        "formdata-node": "^4.3.2",
-        "node-fetch": "^2.6.7"
-      },
-      "bin": {
-        "openai": "bin/cli"
-      },
-      "peerDependencies": {
-        "ws": "^8.18.0",
-        "zod": "^3.23.8"
-      },
-      "peerDependenciesMeta": {
-        "ws": {
-          "optional": true
-        },
-        "zod": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/openai/node_modules/@types/node": {
-      "version": "18.19.103",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.103.tgz",
-      "integrity": "sha512-hHTHp+sEz6SxFsp+SA+Tqrua3AbmlAw+Y//aEwdHrdZkYVRWdvWD3y5uPZ0flYOkgskaFWqZ/YGFm3FaFQ0pRw==",
-      "license": "MIT",
-      "dependencies": {
-        "undici-types": "~5.26.4"
-      }
-    },
-    "node_modules/openai/node_modules/undici-types": {
-      "version": "5.26.5",
-      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
-      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
-      "license": "MIT"
-    },
    "node_modules/optionator": {
      "version": "0.9.4",
      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@ -10631,7 +11025,6 @@
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
      "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
-      "dev": true,
      "license": "MIT",
      "funding": {
        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
@ -11682,6 +12075,25 @@
      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
      "license": "0BSD"
    },
+    "node_modules/tsx": {
+      "version": "4.19.4",
+      "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.4.tgz",
+      "integrity": "sha512-gK5GVzDkJK1SI1zwHf32Mqxf2tSJkNx+eYcNly5+nHvWqXUJYUkWBQtKauoESz3ymezAI++ZwT855x5p5eop+Q==",
+      "license": "MIT",
+      "dependencies": {
+        "esbuild": "~0.25.0",
+        "get-tsconfig": "^4.7.5"
+      },
+      "bin": {
+        "tsx": "dist/cli.mjs"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.3"
+      }
+    },
    "node_modules/tw-animate-css": {
      "version": "1.2.9",
      "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz",
@ -12269,7 +12681,7 @@
      "version": "8.18.2",
      "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz",
      "integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==",
-      "devOptional": true,
+      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">=10.0.0"
@ -12380,7 +12792,7 @@
      "version": "3.24.4",
      "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.4.tgz",
      "integrity": "sha512-OdqJE9UDRPwWsrHjLN2F8bPxvwJBK22EHLWtanu0LSYr5YqzsaaW3RMgmjwr8Rypg5k+meEJdSPXJZXE/yqOMg==",
-      "devOptional": true,
+      "dev": true,
      "license": "MIT",
      "funding": {
        "url": "https://github.com/sponsors/colinhacks"