Merge-related changes.

2026-01-02 12:44:31 +00:00 · 2025-04-02 19:56:44 +02:00 · 2025-04-02 19:56:44 +02:00 · 60e9f46856
commit 60e9f46856
parent d38aea33c1 66d6c2580e
456 changed files with 38636 additions and 10892 deletions
--- a/llama_stack/distribution/access_control.py
+++ b/llama_stack/distribution/access_control.py
@ -0,0 +1,86 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict, Optional
+
+from llama_stack.distribution.datatypes import AccessAttributes
+from llama_stack.log import get_logger
+
+logger = get_logger(__name__, category="core")
+
+
+def check_access(
+    obj_identifier: str,
+    obj_attributes: Optional[AccessAttributes],
+    user_attributes: Optional[Dict[str, Any]] = None,
+) -> bool:
+    """Check if the current user has access to the given object, based on access attributes.
+
+    Access control algorithm:
+    1. If the resource has no access_attributes, access is GRANTED to all authenticated users
+    2. If the user has no attributes, access is DENIED to any object with access_attributes defined
+    3. For each attribute category in the resource's access_attributes:
+       a. If the user lacks that category, access is DENIED
+       b. If the user has the category but none of the required values, access is DENIED
+       c. If the user has at least one matching value in each required category, access is GRANTED
+
+    Example:
+        # Resource requires:
+        access_attributes = AccessAttributes(
+            roles=["admin", "data-scientist"],
+            teams=["ml-team"]
+        )
+
+        # User has:
+        user_attributes = {
+            "roles": ["data-scientist", "engineer"],
+            "teams": ["ml-team", "infra-team"],
+            "projects": ["llama-3"]
+        }
+
+        # Result: Access GRANTED
+        # - User has the "data-scientist" role (matches one of the required roles)
+        # - AND user is part of the "ml-team" (matches the required team)
+        # - The extra "projects" attribute is ignored
+
+    Args:
+        obj_identifier: The identifier of the resource object to check access for
+        obj_attributes: The access attributes of the resource object
+        user_attributes: The attributes of the current user
+
+    Returns:
+        bool: True if access is granted, False if denied
+    """
+    # If object has no access attributes, allow access by default
+    if not obj_attributes:
+        return True
+
+    # If no user attributes, deny access to objects with access control
+    if not user_attributes:
+        return False
+
+    dict_attribs = obj_attributes.model_dump(exclude_none=True)
+    if not dict_attribs:
+        return True
+
+    # Check each attribute category (requires ALL categories to match)
+    # TODO: formalize this into a proper ABAC policy
+    for attr_key, required_values in dict_attribs.items():
+        user_values = user_attributes.get(attr_key, [])
+
+        if not user_values:
+            logger.debug(f"Access denied to {obj_identifier}: missing required attribute category '{attr_key}'")
+            return False
+
+        if not any(val in user_values for val in required_values):
+            logger.debug(
+                f"Access denied to {obj_identifier}: "
+                f"no match for attribute '{attr_key}', required one of {required_values}"
+            )
+            return False
+
+    logger.debug(f"Access granted to {obj_identifier}")
+    return True
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -6,7 +6,6 @@

 import importlib.resources
 import logging
-import sys
 from pathlib import Path
 from typing import Dict, List

@ -15,7 +14,7 @@ from termcolor import cprint

 from llama_stack.distribution.datatypes import BuildConfig, Provider
 from llama_stack.distribution.distribution import get_provider_registry
-from llama_stack.distribution.utils.exec import run_command, run_with_pty
+from llama_stack.distribution.utils.exec import run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api

@ -123,11 +122,7 @@ def build_image(
    if special_deps:
        args.append("#".join(special_deps))

-    is_terminal = sys.stdin.isatty()
-    if is_terminal:
-        return_code = run_with_pty(args)
-    else:
-        return_code = run_command(args)
+    return_code = run_command(args)

    if return_code != 0:
        log.error(
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -43,7 +43,7 @@ RED='\033[0;31m'
 NC='\033[0m' # No Color

 CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
-CONTAINER_OPTS=${CONTAINER_OPTS:-}
+CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain}

 TEMP_DIR=$(mktemp -d)

@ -90,6 +90,7 @@ RUN apt-get update && apt-get install -y \
       procps psmisc lsof \
       traceroute \
       bubblewrap \
+       gcc \
       && rm -rf /var/lib/apt/lists/*

 ENV UV_SYSTEM_PYTHON=1
@ -235,7 +236,7 @@ image_tag="$image_name:$version_tag"
 # Detect platform architecture
 ARCH=$(uname -m)
 if [ -n "$BUILD_PLATFORM" ]; then
-  CLI_ARGS+=("--platform $BUILD_PLATFORM")
+  CLI_ARGS+=("--platform" "$BUILD_PLATFORM")
 elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
  CLI_ARGS+=("--platform" "linux/arm64")
 elif [ "$ARCH" = "x86_64" ]; then
@ -253,8 +254,7 @@ $CONTAINER_BINARY build \
  "${CLI_ARGS[@]}" \
  -t "$image_tag" \
  -f "$TEMP_DIR/Containerfile" \
-  "." \
-  --progress=plain
+  "."

 # clean up tmp/configs
 set +x
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@ -62,7 +62,7 @@ def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec
    if config.apis:
        apis_to_serve = config.apis
    else:
-        apis_to_serve = [a.value for a in Api if a not in (Api.telemetry, Api.inspect)]
+        apis_to_serve = [a.value for a in Api if a not in (Api.telemetry, Api.inspect, Api.providers)]

    for api_str in apis_to_serve:
        api = Api(api_str)
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -16,6 +16,7 @@ from llama_stack.apis.inference import Inference
 from llama_stack.apis.models import Model, ModelInput
 from llama_stack.apis.preprocessing import Preprocessing, Preprocessor
 from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput
+from llama_stack.apis.resource import Resource
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
@ -33,6 +34,119 @@ LLAMA_STACK_RUN_CONFIG_VERSION = "2"
 RoutingKey = Union[str, List[str]]


+class AccessAttributes(BaseModel):
+    """Structured representation of user attributes for access control.
+
+    This model defines a structured approach to representing user attributes
+    with common standard categories for access control.
+
+    Standard attribute categories include:
+    - roles: Role-based attributes (e.g., admin, data-scientist)
+    - teams: Team-based attributes (e.g., ml-team, infra-team)
+    - projects: Project access attributes (e.g., llama-3, customer-insights)
+    - namespaces: Namespace-based access control for resource isolation
+    """
+
+    # Standard attribute categories - the minimal set we need now
+    roles: Optional[List[str]] = Field(
+        default=None, description="Role-based attributes (e.g., 'admin', 'data-scientist', 'user')"
+    )
+
+    teams: Optional[List[str]] = Field(default=None, description="Team-based attributes (e.g., 'ml-team', 'nlp-team')")
+
+    projects: Optional[List[str]] = Field(
+        default=None, description="Project-based access attributes (e.g., 'llama-3', 'customer-insights')"
+    )
+
+    namespaces: Optional[List[str]] = Field(
+        default=None, description="Namespace-based access control for resource isolation"
+    )
+
+
+class ResourceWithACL(Resource):
+    """Extension of Resource that adds attribute-based access control capabilities.
+
+    This class adds an optional access_attributes field that allows fine-grained control
+    over which users can access each resource. When attributes are defined, a user must have
+    matching attributes to access the resource.
+
+    Attribute Matching Algorithm:
+    1. If a resource has no access_attributes (None or empty dict), it's visible to all authenticated users
+    2. Each key in access_attributes represents an attribute category (e.g., "roles", "teams", "projects")
+    3. The matching algorithm requires ALL categories to match (AND relationship between categories)
+    4. Within each category, ANY value match is sufficient (OR relationship within a category)
+
+    Examples:
+        # Resource visible to everyone (no access control)
+        model = Model(identifier="llama-2", ...)
+
+        # Resource visible only to admins
+        model = Model(
+            identifier="gpt-4",
+            access_attributes=AccessAttributes(roles=["admin"])
+        )
+
+        # Resource visible to data scientists on the ML team
+        model = Model(
+            identifier="private-model",
+            access_attributes=AccessAttributes(
+                roles=["data-scientist", "researcher"],
+                teams=["ml-team"]
+            )
+        )
+        # ^ User must have at least one of the roles AND be on the ml-team
+
+        # Resource visible to users with specific project access
+        vector_db = VectorDB(
+            identifier="customer-embeddings",
+            access_attributes=AccessAttributes(
+                projects=["customer-insights"],
+                namespaces=["confidential"]
+            )
+        )
+        # ^ User must have access to the customer-insights project AND have confidential namespace
+    """
+
+    access_attributes: Optional[AccessAttributes] = None
+
+
+# Use the extended Resource for all routable objects
+class ModelWithACL(Model, ResourceWithACL):
+    pass
+
+
+class ShieldWithACL(Shield, ResourceWithACL):
+    pass
+
+
+class VectorDBWithACL(VectorDB, ResourceWithACL):
+    pass
+
+
+class DatasetWithACL(Dataset, ResourceWithACL):
+    pass
+
+
+class ScoringFnWithACL(ScoringFn, ResourceWithACL):
+    pass
+
+
+class BenchmarkWithACL(Benchmark, ResourceWithACL):
+    pass
+
+
+class ToolWithACL(Tool, ResourceWithACL):
+    pass
+
+
+class ToolGroupWithACL(ToolGroup, ResourceWithACL):
+    pass
+
+
+class PreprocessorWithACL(Preprocessor, ResourceWithACL):
+    pass
+
+
 RoutableObject = Union[
    Model,
    Shield,
@ -48,15 +162,15 @@ RoutableObject = Union[

 RoutableObjectWithProvider = Annotated[
    Union[
-        Model,
-        Shield,
-        VectorDB,
-        Dataset,
-        ScoringFn,
-        Benchmark,
-        Tool,
-        ToolGroup,
-        Preprocessor,
+        ModelWithACL,
+        ShieldWithACL,
+        VectorDBWithACL,
+        DatasetWithACL,
+        ScoringFnWithACL,
+        BenchmarkWithACL,
+        ToolWithACL,
+        ToolGroupWithACL,
+        PreprocessorWithACL,
    ],
    Field(discriminator="type"),
 ]
@ -122,6 +236,21 @@ class Provider(BaseModel):
    config: Dict[str, Any]


+class LoggingConfig(BaseModel):
+    category_levels: Dict[str, str] = Field(
+        default_factory=Dict,
+        description="""
+ Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
+    )
+
+
+class AuthenticationConfig(BaseModel):
+    endpoint: str = Field(
+        ...,
+        description="Endpoint URL to validate authentication tokens",
+    )
+
+
 class ServerConfig(BaseModel):
    port: int = Field(
        default=8321,
@ -137,6 +266,10 @@ class ServerConfig(BaseModel):
        default=None,
        description="Path to TLS key file for HTTPS",
    )
+    auth: Optional[AuthenticationConfig] = Field(
+        default=None,
+        description="Authentication configuration for the server",
+    )


 class StackRunConfig(BaseModel):
@ -182,6 +315,8 @@ a default SQLite store will be used.""",
    tool_groups: List[ToolGroupInput] = Field(default_factory=list)
    preprocessors: List[PreprocessorInput] = Field(default_factory=list)

+    logging: Optional[LoggingConfig] = Field(default=None, description="Configuration for Llama Stack Logging")
+
    server: ServerConfig = Field(
        default_factory=ServerConfig,
        description="Configuration for the HTTP(S) server",
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -60,7 +60,7 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:

 def providable_apis() -> List[Api]:
    routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
-    return [api for api in Api if api not in routing_table_apis and api != Api.inspect]
+    return [api for api in Api if api not in routing_table_apis and api != Api.inspect and api != Api.providers]


 def get_provider_registry() -> Dict[Api, Dict[str, ProviderSpec]]:
--- a/llama_stack/distribution/inspect.py
+++ b/llama_stack/distribution/inspect.py
@ -11,9 +11,7 @@ from pydantic import BaseModel
 from llama_stack.apis.inspect import (
    HealthInfo,
    Inspect,
-    ListProvidersResponse,
    ListRoutesResponse,
-    ProviderInfo,
    RouteInfo,
    VersionInfo,
 )
@ -39,24 +37,6 @@ class DistributionInspectImpl(Inspect):
    async def initialize(self) -> None:
        pass

-    async def list_providers(self) -> ListProvidersResponse:
-        run_config = self.config.run_config
-
-        ret = []
-        for api, providers in run_config.providers.items():
-            ret.extend(
-                [
-                    ProviderInfo(
-                        api=api,
-                        provider_id=p.provider_id,
-                        provider_type=p.provider_type,
-                    )
-                    for p in providers
-                ]
-            )
-
-        return ListProvidersResponse(data=ret)
-
    async def list_routes(self) -> ListRoutesResponse:
        run_config = self.config.run_config

--- a/llama_stack/distribution/library_client.py
+++ b/llama_stack/distribution/library_client.py
@ -9,7 +9,6 @@ import inspect
 import json
 import logging
 import os
-import re
 from concurrent.futures import ThreadPoolExecutor
 from enum import Enum
 from pathlib import Path
@ -33,19 +32,24 @@ from llama_stack.distribution.build import print_pip_install_help
 from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
 from llama_stack.distribution.datatypes import Api
 from llama_stack.distribution.request_headers import (
-    preserve_headers_context_async_generator,
+    PROVIDER_DATA_VAR,
    request_provider_data_context,
 )
 from llama_stack.distribution.resolver import ProviderRegistry
-from llama_stack.distribution.server.endpoints import get_all_api_endpoints
+from llama_stack.distribution.server.endpoints import (
+    find_matching_endpoint,
+    initialize_endpoint_impls,
+)
 from llama_stack.distribution.stack import (
    construct_stack,
    get_stack_run_config_from_template,
    redact_sensitive_fields,
    replace_env_vars,
 )
+from llama_stack.distribution.utils.context import preserve_contexts_async_generator
 from llama_stack.distribution.utils.exec import in_notebook
 from llama_stack.providers.utils.telemetry.tracing import (
+    CURRENT_TRACE_CONTEXT,
    end_trace,
    setup_logger,
    start_trace,
@ -230,31 +234,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
            safe_config = redact_sensitive_fields(self.config.model_dump())
            console.print(yaml.dump(safe_config, indent=2))

-        endpoints = get_all_api_endpoints()
-        endpoint_impls = {}
-
-        def _convert_path_to_regex(path: str) -> str:
-            # Convert {param} to named capture groups
-            # handle {param:path} as well which allows for forward slashes in the param value
-            pattern = re.sub(
-                r"{(\w+)(?::path)?}",
-                lambda m: f"(?P<{m.group(1)}>{'[^/]+' if not m.group(0).endswith(':path') else '.+'})",
-                path,
-            )
-
-            return f"^{pattern}$"
-
-        for api, api_endpoints in endpoints.items():
-            if api not in self.impls:
-                continue
-            for endpoint in api_endpoints:
-                impl = self.impls[api]
-                func = getattr(impl, endpoint.name)
-                if endpoint.method not in endpoint_impls:
-                    endpoint_impls[endpoint.method] = {}
-                endpoint_impls[endpoint.method][_convert_path_to_regex(endpoint.route)] = func
-
-        self.endpoint_impls = endpoint_impls
+        self.endpoint_impls = initialize_endpoint_impls(self.impls)
        return True

    async def request(
@ -288,32 +268,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                )
            return response

-    def _find_matching_endpoint(self, method: str, path: str) -> tuple[Any, dict]:
-        """Find the matching endpoint implementation for a given method and path.
-
-        Args:
-            method: HTTP method (GET, POST, etc.)
-            path: URL path to match against
-
-        Returns:
-            A tuple of (endpoint_function, path_params)
-
-        Raises:
-            ValueError: If no matching endpoint is found
-        """
-        impls = self.endpoint_impls.get(method)
-        if not impls:
-            raise ValueError(f"No endpoint found for {path}")
-
-        for regex, func in impls.items():
-            match = re.match(regex, path)
-            if match:
-                # Extract named groups from the regex match
-                path_params = match.groupdict()
-                return func, path_params
-
-        raise ValueError(f"No endpoint found for {path}")
-
    async def _call_non_streaming(
        self,
        *,
@ -324,10 +278,10 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        body = options.params or {}
        body |= options.json_data or {}

-        matched_func, path_params = self._find_matching_endpoint(options.method, path)
+        matched_func, path_params, route = find_matching_endpoint(options.method, path, self.endpoint_impls)
        body |= path_params
        body = self._convert_body(path, options.method, body)
-        await start_trace(options.url, {"__location__": "library_client"})
+        await start_trace(route, {"__location__": "library_client"})
        try:
            result = await matched_func(**body)
        finally:
@ -369,13 +323,14 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        path = options.url
        body = options.params or {}
        body |= options.json_data or {}
-        func, path_params = self._find_matching_endpoint(options.method, path)
+        func, path_params, route = find_matching_endpoint(options.method, path, self.endpoint_impls)
        body |= path_params

        body = self._convert_body(path, options.method, body)

+        await start_trace(route, {"__location__": "library_client"})
+
        async def gen():
-            await start_trace(options.url, {"__location__": "library_client"})
            try:
                async for chunk in await func(**body):
                    data = json.dumps(convert_pydantic_to_json_value(chunk))
@ -384,8 +339,8 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
            finally:
                await end_trace()

-        # Wrap the generator to preserve context across iterations
-        wrapped_gen = preserve_headers_context_async_generator(gen())
+        wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
+
        mock_response = httpx.Response(
            status_code=httpx.codes.OK,
            content=wrapped_gen,
@ -420,7 +375,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        if not body:
            return {}

-        func, _ = self._find_matching_endpoint(method, path)
+        func, _, _ = find_matching_endpoint(method, path, self.endpoint_impls)
        sig = inspect.signature(func)

        # Strip NOT_GIVENs to use the defaults in signature
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@ -0,0 +1,66 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from pydantic import BaseModel
+
+from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
+from llama_stack.log import get_logger
+
+from .datatypes import StackRunConfig
+from .stack import redact_sensitive_fields
+
+logger = get_logger(name=__name__, category="core")
+
+
+class ProviderImplConfig(BaseModel):
+    run_config: StackRunConfig
+
+
+async def get_provider_impl(config, deps):
+    impl = ProviderImpl(config, deps)
+    await impl.initialize()
+    return impl
+
+
+class ProviderImpl(Providers):
+    def __init__(self, config, deps):
+        self.config = config
+        self.deps = deps
+
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        logger.debug("ProviderImpl.shutdown")
+        pass
+
+    async def list_providers(self) -> ListProvidersResponse:
+        run_config = self.config.run_config
+        safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
+        ret = []
+        for api, providers in safe_config.providers.items():
+            ret.extend(
+                [
+                    ProviderInfo(
+                        api=api,
+                        provider_id=p.provider_id,
+                        provider_type=p.provider_type,
+                        config=p.config,
+                    )
+                    for p in providers
+                ]
+            )
+
+        return ListProvidersResponse(data=ret)
+
+    async def inspect_provider(self, provider_id: str) -> ProviderInfo:
+        all_providers = await self.list_providers()
+        for p in all_providers.data:
+            if p.provider_id == provider_id:
+                return p
+
+        raise ValueError(f"Provider {provider_id} not found")
--- a/llama_stack/distribution/request_headers.py
+++ b/llama_stack/distribution/request_headers.py
@ -7,59 +7,37 @@
 import contextvars
 import json
 import logging
-from typing import Any, AsyncGenerator, ContextManager, Dict, Optional, TypeVar
+from typing import Any, ContextManager, Dict, List, Optional

 from .utils.dynamic import instantiate_class_type

 log = logging.getLogger(__name__)

-# Context variable for request provider data
-_provider_data_var = contextvars.ContextVar("provider_data", default=None)
+# Context variable for request provider data and auth attributes
+PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)


 class RequestProviderDataContext(ContextManager):
    """Context manager for request provider data"""

-    def __init__(self, provider_data: Optional[Dict[str, Any]] = None):
-        self.provider_data = provider_data
+    def __init__(
+        self, provider_data: Optional[Dict[str, Any]] = None, auth_attributes: Optional[Dict[str, List[str]]] = None
+    ):
+        self.provider_data = provider_data or {}
+        if auth_attributes:
+            self.provider_data["__auth_attributes"] = auth_attributes
+
        self.token = None

    def __enter__(self):
        # Save the current value and set the new one
-        self.token = _provider_data_var.set(self.provider_data)
+        self.token = PROVIDER_DATA_VAR.set(self.provider_data)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        # Restore the previous value
        if self.token is not None:
-            _provider_data_var.reset(self.token)
-
-
-T = TypeVar("T")
-
-
-def preserve_headers_context_async_generator(gen: AsyncGenerator[T, None]) -> AsyncGenerator[T, None]:
-    """
-    Wraps an async generator to preserve request headers context variables across iterations.
-
-    This ensures that context variables set during generator creation are
-    available during each iteration of the generator, even if the original
-    context manager has exited.
-    """
-    # Capture the current context value right now
-    context_value = _provider_data_var.get()
-
-    async def wrapper():
-        while True:
-            # Set context before each anext() call
-            _ = _provider_data_var.set(context_value)
-            try:
-                item = await gen.__anext__()
-                yield item
-            except StopAsyncIteration:
-                break
-
-    return wrapper()
+            PROVIDER_DATA_VAR.reset(self.token)


 class NeedsRequestProviderData:
@ -72,7 +50,7 @@ class NeedsRequestProviderData:
        if not validator_class:
            raise ValueError(f"Provider {provider_type} does not have a validator")

-        val = _provider_data_var.get()
+        val = PROVIDER_DATA_VAR.get()
        if not val:
            return None

@ -107,7 +85,17 @@ def parse_request_provider_data(headers: Dict[str, str]) -> Optional[Dict[str, A
        return None


-def request_provider_data_context(headers: Dict[str, str]) -> ContextManager:
-    """Context manager that sets request provider data from headers for the duration of the context"""
+def request_provider_data_context(
+    headers: Dict[str, str], auth_attributes: Optional[Dict[str, List[str]]] = None
+) -> ContextManager:
+    """Context manager that sets request provider data from headers and auth attributes for the duration of the context"""
    provider_data = parse_request_provider_data(headers)
-    return RequestProviderDataContext(provider_data)
+    return RequestProviderDataContext(provider_data, auth_attributes)
+
+
+def get_auth_attributes() -> Optional[Dict[str, List[str]]]:
+    """Helper to retrieve auth attributes from the provider data context"""
+    provider_data = PROVIDER_DATA_VAR.get()
+    if not provider_data:
+        return None
+    return provider_data.get("__auth_attributes")
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -12,12 +12,14 @@ from llama_stack.apis.benchmarks import Benchmarks
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
 from llama_stack.apis.eval import Eval
+from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.inspect import Inspect
 from llama_stack.apis.models import Models
 from llama_stack.apis.post_training import PostTraining
 from llama_stack.apis.preprocessing import Preprocessing
 from llama_stack.apis.preprocessing.preprocessors import Preprocessors
+from llama_stack.apis.providers import Providers as ProvidersAPI
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFunctions
@ -62,6 +64,7 @@ class InvalidProviderError(Exception):

 def api_protocol_map() -> Dict[Api, Any]:
    return {
+        Api.providers: ProvidersAPI,
        Api.agents: Agents,
        Api.inference: Inference,
        Api.inspect: Inspect,
@ -80,6 +83,7 @@ def api_protocol_map() -> Dict[Api, Any]:
        Api.post_training: PostTraining,
        Api.tool_groups: ToolGroups,
        Api.tool_runtime: ToolRuntime,
+        Api.files: Files,
        Api.preprocessing: Preprocessing,
        Api.preprocessors: Preprocessors,
    }
@ -171,7 +175,9 @@ def specs_for_autorouted_apis(apis_to_serve: List[str] | Set[str]) -> Dict[str,
                    module="llama_stack.distribution.routers",
                    routing_table_api=info.routing_table_api,
                    api_dependencies=[info.routing_table_api],
-                    deps__=[info.routing_table_api.value],
+                    # Add telemetry as an optional dependency to all auto-routed providers
+                    optional_api_dependencies=[Api.telemetry],
+                    deps__=([info.routing_table_api.value, Api.telemetry.value]),
                ),
            )
        }
@ -251,6 +257,25 @@ def sort_providers_by_deps(
        )
    )

+    sorted_providers.append(
+        (
+            "providers",
+            ProviderWithSpec(
+                provider_id="__builtin__",
+                provider_type="__builtin__",
+                config={"run_config": run_config.model_dump()},
+                spec=InlineProviderSpec(
+                    api=Api.providers,
+                    provider_type="__builtin__",
+                    config_class="llama_stack.distribution.providers.ProviderImplConfig",
+                    module="llama_stack.distribution.providers",
+                    api_dependencies=apis,
+                    deps__=[x.value for x in apis],
+                ),
+            ),
+        )
+    )
+
    logger.debug(f"Resolved {len(sorted_providers)} providers")
    for api_str, provider in sorted_providers:
        logger.debug(f" {api_str} => {provider.provider_id}")
--- a/llama_stack/distribution/routers/init.py
+++ b/llama_stack/distribution/routers/init.py
@ -47,7 +47,7 @@ async def get_routing_table_impl(
    return impl


-async def get_auto_router_impl(api: Api, routing_table: RoutingTable, _deps) -> Any:
+async def get_auto_router_impl(api: Api, routing_table: RoutingTable, deps: Dict[str, Any]) -> Any:
    from .routers import (
        DatasetIORouter,
        EvalRouter,
@ -69,9 +69,17 @@ async def get_auto_router_impl(api: Api, routing_table: RoutingTable, _deps) ->
        "tool_runtime": ToolRuntimeRouter,
        "preprocessing": PreprocessingRouter,
    }
+    api_to_deps = {
+        "inference": {"telemetry": Api.telemetry},
+    }
    if api.value not in api_to_routers:
        raise ValueError(f"API {api.value} not found in router map")

-    impl = api_to_routers[api.value](routing_table)
+    api_to_dep_impl = {}
+    for dep_name, dep_api in api_to_deps.get(api.value, {}).items():
+        if dep_api in deps:
+            api_to_dep_impl[dep_name] = deps[dep_api]
+
+    impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
    await impl.initialize()
    return impl
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@ -4,22 +4,23 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import Any, AsyncGenerator, Dict, List, Optional
+import time
+from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union

 from llama_stack.apis.common.content_types import (
    URL,
    InterleavedContent,
    InterleavedContentItem,
 )
-from llama_stack.apis.datasetio import DatasetIO, PaginatedRowsResult
-from llama_stack.apis.eval import (
-    BenchmarkConfig,
-    Eval,
-    EvaluateResponse,
-    Job,
-    JobStatus,
-)
+from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack.apis.datasetio import DatasetIO
+from llama_stack.apis.datasets import DatasetPurpose, DataSource
+from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
 from llama_stack.apis.inference import (
+    ChatCompletionResponse,
+    ChatCompletionResponseEventType,
+    ChatCompletionResponseStreamChunk,
+    CompletionMessage,
    EmbeddingsResponse,
    EmbeddingTaskType,
    Inference,
@ -27,13 +28,14 @@ from llama_stack.apis.inference import (
    Message,
    ResponseFormat,
    SamplingParams,
+    StopReason,
    TextTruncation,
    ToolChoice,
    ToolConfig,
    ToolDefinition,
    ToolPromptFormat,
 )
-from llama_stack.apis.models import ModelType
+from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.preprocessing import (
    Preprocessing,
    PreprocessingDataElement,
@ -48,18 +50,22 @@ from llama_stack.apis.scoring import (
    ScoringFnParams,
 )
 from llama_stack.apis.shields import Shield
+from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
 from llama_stack.apis.tools import (
+    ListToolDefsResponse,
    RAGDocument,
    RAGQueryConfig,
    RAGQueryResult,
    RAGToolRuntime,
-    ToolDef,
    ToolRuntime,
 )
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.distribution.utils.chain import execute_preprocessor_chain
 from llama_stack.log import get_logger
+from llama_stack.models.llama.llama3.chat_format import ChatFormat
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.datatypes import RoutingTable
+from llama_stack.providers.utils.telemetry.tracing import get_current_span

 logger = get_logger(name=__name__, category="core")

@ -126,9 +132,14 @@ class InferenceRouter(Inference):
    def __init__(
        self,
        routing_table: RoutingTable,
+        telemetry: Optional[Telemetry] = None,
    ) -> None:
        logger.debug("Initializing InferenceRouter")
        self.routing_table = routing_table
+        self.telemetry = telemetry
+        if self.telemetry:
+            self.tokenizer = Tokenizer.get_instance()
+            self.formatter = ChatFormat(self.tokenizer)

    async def initialize(self) -> None:
        logger.debug("InferenceRouter.initialize")
@ -151,6 +162,75 @@ class InferenceRouter(Inference):
        )
        await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)

+    def _construct_metrics(
+        self,
+        prompt_tokens: int,
+        completion_tokens: int,
+        total_tokens: int,
+        model: Model,
+    ) -> List[MetricEvent]:
+        """Constructs a list of MetricEvent objects containing token usage metrics.
+
+        Args:
+            prompt_tokens: Number of tokens in the prompt
+            completion_tokens: Number of tokens in the completion
+            total_tokens: Total number of tokens used
+            model: Model object containing model_id and provider_id
+
+        Returns:
+            List of MetricEvent objects with token usage metrics
+        """
+        span = get_current_span()
+        if span is None:
+            logger.warning("No span found for token usage metrics")
+            return []
+        metrics = [
+            ("prompt_tokens", prompt_tokens),
+            ("completion_tokens", completion_tokens),
+            ("total_tokens", total_tokens),
+        ]
+        metric_events = []
+        for metric_name, value in metrics:
+            metric_events.append(
+                MetricEvent(
+                    trace_id=span.trace_id,
+                    span_id=span.span_id,
+                    metric=metric_name,
+                    value=value,
+                    timestamp=time.time(),
+                    unit="tokens",
+                    attributes={
+                        "model_id": model.model_id,
+                        "provider_id": model.provider_id,
+                    },
+                )
+            )
+        return metric_events
+
+    async def _compute_and_log_token_usage(
+        self,
+        prompt_tokens: int,
+        completion_tokens: int,
+        total_tokens: int,
+        model: Model,
+    ) -> List[MetricInResponse]:
+        metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
+        if self.telemetry:
+            for metric in metrics:
+                await self.telemetry.log_event(metric)
+        return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
+
+    async def _count_tokens(
+        self,
+        messages: List[Message] | InterleavedContent,
+        tool_prompt_format: Optional[ToolPromptFormat] = None,
+    ) -> Optional[int]:
+        if isinstance(messages, list):
+            encoded = self.formatter.encode_dialog_prompt(messages, tool_prompt_format)
+        else:
+            encoded = self.formatter.encode_content(messages)
+        return len(encoded.tokens) if encoded and encoded.tokens else 0
+
    async def chat_completion(
        self,
        model_id: str,
@ -163,7 +243,7 @@ class InferenceRouter(Inference):
        stream: Optional[bool] = False,
        logprobs: Optional[LogProbConfig] = None,
        tool_config: Optional[ToolConfig] = None,
-    ) -> AsyncGenerator:
+    ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
        logger.debug(
            f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
        )
@ -213,10 +293,52 @@ class InferenceRouter(Inference):
            tool_config=tool_config,
        )
        provider = self.routing_table.get_provider_impl(model_id)
+        prompt_tokens = await self._count_tokens(messages, tool_config.tool_prompt_format)
+
        if stream:
-            return (chunk async for chunk in await provider.chat_completion(**params))
+
+            async def stream_generator():
+                completion_text = ""
+                async for chunk in await provider.chat_completion(**params):
+                    if chunk.event.event_type == ChatCompletionResponseEventType.progress:
+                        if chunk.event.delta.type == "text":
+                            completion_text += chunk.event.delta.text
+                    if chunk.event.event_type == ChatCompletionResponseEventType.complete:
+                        completion_tokens = await self._count_tokens(
+                            [
+                                CompletionMessage(
+                                    content=completion_text,
+                                    stop_reason=StopReason.end_of_turn,
+                                )
+                            ],
+                            tool_config.tool_prompt_format,
+                        )
+                        total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
+                        metrics = await self._compute_and_log_token_usage(
+                            prompt_tokens or 0,
+                            completion_tokens or 0,
+                            total_tokens,
+                            model,
+                        )
+                        chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics
+                    yield chunk
+
+            return stream_generator()
        else:
-            return await provider.chat_completion(**params)
+            response = await provider.chat_completion(**params)
+            completion_tokens = await self._count_tokens(
+                [response.completion_message],
+                tool_config.tool_prompt_format,
+            )
+            total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
+            metrics = await self._compute_and_log_token_usage(
+                prompt_tokens or 0,
+                completion_tokens or 0,
+                total_tokens,
+                model,
+            )
+            response.metrics = metrics if response.metrics is None else response.metrics + metrics
+            return response

    async def completion(
        self,
@ -246,10 +368,41 @@ class InferenceRouter(Inference):
            stream=stream,
            logprobs=logprobs,
        )
+
+        prompt_tokens = await self._count_tokens(content)
+
        if stream:
-            return (chunk async for chunk in await provider.completion(**params))
+
+            async def stream_generator():
+                completion_text = ""
+                async for chunk in await provider.completion(**params):
+                    if hasattr(chunk, "delta"):
+                        completion_text += chunk.delta
+                    if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry:
+                        completion_tokens = await self._count_tokens(completion_text)
+                        total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
+                        metrics = await self._compute_and_log_token_usage(
+                            prompt_tokens or 0,
+                            completion_tokens or 0,
+                            total_tokens,
+                            model,
+                        )
+                        chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics
+                    yield chunk
+
+            return stream_generator()
        else:
-            return await provider.completion(**params)
+            response = await provider.completion(**params)
+            completion_tokens = await self._count_tokens(response.content)
+            total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
+            metrics = await self._compute_and_log_token_usage(
+                prompt_tokens or 0,
+                completion_tokens or 0,
+                total_tokens,
+                model,
+            )
+            response.metrics = metrics if response.metrics is None else response.metrics + metrics
+            return response

    async def embeddings(
        self,
@ -330,21 +483,36 @@ class DatasetIORouter(DatasetIO):
        logger.debug("DatasetIORouter.shutdown")
        pass

-    async def get_rows_paginated(
+    async def register_dataset(
+        self,
+        purpose: DatasetPurpose,
+        source: DataSource,
+        metadata: Optional[Dict[str, Any]] = None,
+        dataset_id: Optional[str] = None,
+    ) -> None:
+        logger.debug(
+            f"DatasetIORouter.register_dataset: {purpose=} {source=} {metadata=} {dataset_id=}",
+        )
+        await self.routing_table.register_dataset(
+            purpose=purpose,
+            source=source,
+            metadata=metadata,
+            dataset_id=dataset_id,
+        )
+
+    async def iterrows(
        self,
        dataset_id: str,
-        rows_in_page: int,
-        page_token: Optional[str] = None,
-        filter_condition: Optional[str] = None,
-    ) -> PaginatedRowsResult:
+        start_index: Optional[int] = None,
+        limit: Optional[int] = None,
+    ) -> PaginatedResponse:
        logger.debug(
-            f"DatasetIORouter.get_rows_paginated: {dataset_id}, rows_in_page={rows_in_page}",
+            f"DatasetIORouter.iterrows: {dataset_id}, {start_index=} {limit=}",
        )
-        return await self.routing_table.get_provider_impl(dataset_id).get_rows_paginated(
+        return await self.routing_table.get_provider_impl(dataset_id).iterrows(
            dataset_id=dataset_id,
-            rows_in_page=rows_in_page,
-            page_token=page_token,
-            filter_condition=filter_condition,
+            start_index=start_index,
+            limit=limit,
        )

    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
@ -457,7 +625,7 @@ class EvalRouter(Eval):
        self,
        benchmark_id: str,
        job_id: str,
-    ) -> Optional[JobStatus]:
+    ) -> Job:
        logger.debug(f"EvalRouter.job_status: {benchmark_id}, {job_id}")
        return await self.routing_table.get_provider_impl(benchmark_id).job_status(benchmark_id, job_id)

@ -547,7 +715,7 @@ class ToolRuntimeRouter(ToolRuntime):

    async def list_runtime_tools(
        self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
-    ) -> List[ToolDef]:
+    ) -> ListToolDefsResponse:
        logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
        return await self.routing_table.get_provider_impl(tool_group_id).list_tools(tool_group_id, mcp_endpoint)

--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.

 import logging
+import uuid
 from typing import Any, Dict, List, Optional

 from pydantic import TypeAdapter
@ -12,7 +13,16 @@ from pydantic import TypeAdapter
 from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.datasets import Dataset, Datasets, ListDatasetsResponse
+from llama_stack.apis.datasets import (
+    Dataset,
+    DatasetPurpose,
+    Datasets,
+    DatasetType,
+    DataSource,
+    ListDatasetsResponse,
+    RowsDataSource,
+    URIDataSource,
+)
 from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType
 from llama_stack.apis.preprocessing.preprocessors import ListPreprocessorsResponse, Preprocessor, Preprocessors
 from llama_stack.apis.resource import ResourceType
@ -32,11 +42,23 @@ from llama_stack.apis.tools import (
    ToolHost,
 )
 from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
+from llama_stack.distribution.access_control import check_access
 from llama_stack.distribution.datatypes import (
+    AccessAttributes,
+    BenchmarkWithACL,
+    DatasetWithACL,
+    ModelWithACL,
+    PreprocessorWithACL,
    RoutableObject,
    RoutableObjectWithProvider,
    RoutedProtocol,
+    ScoringFnWithACL,
+    ShieldWithACL,
+    ToolGroupWithACL,
+    ToolWithACL,
+    VectorDBWithACL,
 )
+from llama_stack.distribution.request_headers import get_auth_attributes
 from llama_stack.distribution.store import DistributionRegistry
 from llama_stack.providers.datatypes import Api, RoutingTable

@ -185,6 +207,11 @@ class CommonRoutingTableImpl(RoutingTable):
        if not obj:
            return None

+        # Check if user has permission to access this object
+        if not check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes()):
+            logger.debug(f"Access denied to {type} '{identifier}' based on attribute mismatch")
+            return None
+
        return obj

    async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
@ -201,6 +228,13 @@ class CommonRoutingTableImpl(RoutingTable):

        p = self.impls_by_provider_id[obj.provider_id]

+        # If object supports access control but no attributes set, use creator's attributes
+        if not obj.access_attributes:
+            creator_attributes = get_auth_attributes()
+            if creator_attributes:
+                obj.access_attributes = AccessAttributes(**creator_attributes)
+                logger.info(f"Setting access attributes for {obj.type} '{obj.identifier}' based on creator's identity")
+
        registered_obj = await register_object_with_provider(obj, p)
        # TODO: This needs to be fixed for all APIs once they return the registered object
        if obj.type == ResourceType.model.value:
@ -213,15 +247,28 @@ class CommonRoutingTableImpl(RoutingTable):

    async def get_all_with_type(self, type: str) -> List[RoutableObjectWithProvider]:
        objs = await self.dist_registry.get_all()
-        return [obj for obj in objs if obj.type == type]
+        filtered_objs = [obj for obj in objs if obj.type == type]
+
+        # Apply attribute-based access control filtering
+        if filtered_objs:
+            filtered_objs = [
+                obj
+                for obj in filtered_objs
+                if check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes())
+            ]
+
+        return filtered_objs


 class ModelsRoutingTable(CommonRoutingTableImpl, Models):
    async def list_models(self) -> ListModelsResponse:
        return ListModelsResponse(data=await self.get_all_with_type("model"))

-    async def get_model(self, model_id: str) -> Optional[Model]:
-        return await self.get_object_by_identifier("model", model_id)
+    async def get_model(self, model_id: str) -> Model:
+        model = await self.get_object_by_identifier("model", model_id)
+        if model is None:
+            raise ValueError(f"Model '{model_id}' not found")
+        return model

    async def register_model(
        self,
@ -247,7 +294,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
            model_type = ModelType.llm
        if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
            raise ValueError("Embedding model must have an embedding dimension in its metadata")
-        model = Model(
+        model = ModelWithACL(
            identifier=model_id,
            provider_resource_id=provider_model_id,
            provider_id=provider_id,
@ -268,8 +315,11 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
    async def list_shields(self) -> ListShieldsResponse:
        return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value))

-    async def get_shield(self, identifier: str) -> Optional[Shield]:
-        return await self.get_object_by_identifier("shield", identifier)
+    async def get_shield(self, identifier: str) -> Shield:
+        shield = await self.get_object_by_identifier("shield", identifier)
+        if shield is None:
+            raise ValueError(f"Shield '{identifier}' not found")
+        return shield

    async def register_shield(
        self,
@ -290,7 +340,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
                )
        if params is None:
            params = {}
-        shield = Shield(
+        shield = ShieldWithACL(
            identifier=shield_id,
            provider_resource_id=provider_shield_id,
            provider_id=provider_id,
@ -304,8 +354,11 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
    async def list_vector_dbs(self) -> ListVectorDBsResponse:
        return ListVectorDBsResponse(data=await self.get_all_with_type("vector_db"))

-    async def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]:
-        return await self.get_object_by_identifier("vector_db", vector_db_id)
+    async def get_vector_db(self, vector_db_id: str) -> VectorDB:
+        vector_db = await self.get_object_by_identifier("vector_db", vector_db_id)
+        if vector_db is None:
+            raise ValueError(f"Vector DB '{vector_db_id}' not found")
+        return vector_db

    async def register_vector_db(
        self,
@ -341,7 +394,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
            "embedding_model": embedding_model,
            "embedding_dimension": model.metadata["embedding_dimension"],
        }
-        vector_db = TypeAdapter(VectorDB).validate_python(vector_db_data)
+        vector_db = TypeAdapter(VectorDBWithACL).validate_python(vector_db_data)
        await self.register_object(vector_db)
        return vector_db

@ -356,39 +409,56 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
    async def list_datasets(self) -> ListDatasetsResponse:
        return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))

-    async def get_dataset(self, dataset_id: str) -> Optional[Dataset]:
-        return await self.get_object_by_identifier("dataset", dataset_id)
+    async def get_dataset(self, dataset_id: str) -> Dataset:
+        dataset = await self.get_object_by_identifier("dataset", dataset_id)
+        if dataset is None:
+            raise ValueError(f"Dataset '{dataset_id}' not found")
+        return dataset

    async def register_dataset(
        self,
-        dataset_id: str,
-        dataset_schema: Dict[str, ParamType],
-        url: URL,
-        provider_dataset_id: Optional[str] = None,
-        provider_id: Optional[str] = None,
+        purpose: DatasetPurpose,
+        source: DataSource,
        metadata: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        if provider_dataset_id is None:
-            provider_dataset_id = dataset_id
-        if provider_id is None:
-            # If provider_id not specified, use the only provider if it supports this dataset
-            if len(self.impls_by_provider_id) == 1:
-                provider_id = list(self.impls_by_provider_id.keys())[0]
+        dataset_id: Optional[str] = None,
+    ) -> Dataset:
+        if isinstance(source, dict):
+            if source["type"] == "uri":
+                source = URIDataSource.parse_obj(source)
+            elif source["type"] == "rows":
+                source = RowsDataSource.parse_obj(source)
+
+        if not dataset_id:
+            dataset_id = f"dataset-{str(uuid.uuid4())}"
+
+        provider_dataset_id = dataset_id
+
+        # infer provider from source
+        if source.type == DatasetType.rows.value:
+            provider_id = "localfs"
+        elif source.type == DatasetType.uri.value:
+            # infer provider from uri
+            if source.uri.startswith("huggingface"):
+                provider_id = "huggingface"
            else:
-                raise ValueError(
-                    f"No provider specified and multiple providers available. Please specify a provider_id. Available providers: {self.impls_by_provider_id.keys()}"
-                )
+                provider_id = "localfs"
+        else:
+            raise ValueError(f"Unknown data source type: {source.type}")
+
        if metadata is None:
            metadata = {}
-        dataset = Dataset(
+
+        dataset = DatasetWithACL(
            identifier=dataset_id,
            provider_resource_id=provider_dataset_id,
            provider_id=provider_id,
-            dataset_schema=dataset_schema,
-            url=url,
+            purpose=purpose,
+            source=source,
            metadata=metadata,
        )
+
        await self.register_object(dataset)
+        return dataset

    async def unregister_dataset(self, dataset_id: str) -> None:
        dataset = await self.get_dataset(dataset_id)
@ -401,8 +471,11 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
    async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
        return ListScoringFunctionsResponse(data=await self.get_all_with_type(ResourceType.scoring_function.value))

-    async def get_scoring_function(self, scoring_fn_id: str) -> Optional[ScoringFn]:
-        return await self.get_object_by_identifier("scoring_function", scoring_fn_id)
+    async def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn:
+        scoring_fn = await self.get_object_by_identifier("scoring_function", scoring_fn_id)
+        if scoring_fn is None:
+            raise ValueError(f"Scoring function '{scoring_fn_id}' not found")
+        return scoring_fn

    async def register_scoring_function(
        self,
@ -422,7 +495,7 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
                raise ValueError(
                    "No provider specified and multiple providers available. Please specify a provider_id."
                )
-        scoring_fn = ScoringFn(
+        scoring_fn = ScoringFnWithACL(
            identifier=scoring_fn_id,
            description=description,
            return_type=return_type,
@ -438,8 +511,11 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
    async def list_benchmarks(self) -> ListBenchmarksResponse:
        return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))

-    async def get_benchmark(self, benchmark_id: str) -> Optional[Benchmark]:
-        return await self.get_object_by_identifier("benchmark", benchmark_id)
+    async def get_benchmark(self, benchmark_id: str) -> Benchmark:
+        benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
+        if benchmark is None:
+            raise ValueError(f"Benchmark '{benchmark_id}' not found")
+        return benchmark

    async def register_benchmark(
        self,
@ -461,7 +537,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
                )
        if provider_benchmark_id is None:
            provider_benchmark_id = benchmark_id
-        benchmark = Benchmark(
+        benchmark = BenchmarkWithACL(
            identifier=benchmark_id,
            dataset_id=dataset_id,
            scoring_functions=scoring_functions,
@ -483,7 +559,10 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
        return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group"))

    async def get_tool_group(self, toolgroup_id: str) -> ToolGroup:
-        return await self.get_object_by_identifier("tool_group", toolgroup_id)
+        tool_group = await self.get_object_by_identifier("tool_group", toolgroup_id)
+        if tool_group is None:
+            raise ValueError(f"Tool group '{toolgroup_id}' not found")
+        return tool_group

    async def get_tool(self, tool_name: str) -> Tool:
        return await self.get_object_by_identifier("tool", tool_name)
@ -499,9 +578,9 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
        tool_defs = await self.impls_by_provider_id[provider_id].list_runtime_tools(toolgroup_id, mcp_endpoint)
        tool_host = ToolHost.model_context_protocol if mcp_endpoint else ToolHost.distribution

-        for tool_def in tool_defs:
+        for tool_def in tool_defs.data:
            tools.append(
-                Tool(
+                ToolWithACL(
                    identifier=tool_def.name,
                    toolgroup_id=toolgroup_id,
                    description=tool_def.description or "",
@ -526,7 +605,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
            await self.register_object(tool)

        await self.dist_registry.register(
-            ToolGroup(
+            ToolGroupWithACL(
                identifier=toolgroup_id,
                provider_id=provider_id,
                provider_resource_id=toolgroup_id,
@ -539,7 +618,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
        tool_group = await self.get_tool_group(toolgroup_id)
        if tool_group is None:
            raise ValueError(f"Tool group {toolgroup_id} not found")
-        tools = await self.list_tools(toolgroup_id).data
+        tools = (await self.list_tools(toolgroup_id)).data
        for tool in tools:
            await self.unregister_object(tool)
        await self.unregister_object(tool_group)
@ -552,8 +631,11 @@ class PreprocessorsRoutingTable(CommonRoutingTableImpl, Preprocessors):
    async def list_preprocessors(self) -> ListPreprocessorsResponse:
        return ListPreprocessorsResponse(data=await self.get_all_with_type(ResourceType.preprocessor.value))

-    async def get_preprocessor(self, preprocessor_id: str) -> Optional[Preprocessor]:
-        return await self.get_object_by_identifier("preprocessor", preprocessor_id)
+    async def get_preprocessor(self, preprocessor_id: str) -> Preprocessor:
+        preprocessor = await self.get_object_by_identifier("preprocessor", preprocessor_id)
+        if preprocessor is None:
+            raise ValueError(f"Preprocessor '{preprocessor_id}' not found")
+        return preprocessor

    async def register_preprocessor(
        self,
@ -571,7 +653,7 @@ class PreprocessorsRoutingTable(CommonRoutingTableImpl, Preprocessors):
                raise ValueError(
                    "No provider specified and multiple providers available. Please specify a provider_id."
                )
-        preprocessor = Preprocessor(
+        preprocessor = PreprocessorWithACL(
            identifier=preprocessor_id,
            provider_resource_id=provider_preprocessor_id,
            provider_id=provider_id,
--- a/llama_stack/distribution/server/auth.py
+++ b/llama_stack/distribution/server/auth.py
@ -0,0 +1,203 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+from typing import Dict, List, Optional
+from urllib.parse import parse_qs
+
+import httpx
+from pydantic import BaseModel, Field
+
+from llama_stack.distribution.datatypes import AccessAttributes
+from llama_stack.log import get_logger
+
+logger = get_logger(name=__name__, category="auth")
+
+
+class AuthRequestContext(BaseModel):
+    path: str = Field(description="The path of the request being authenticated")
+
+    headers: Dict[str, str] = Field(description="HTTP headers from the original request (excluding Authorization)")
+
+    params: Dict[str, List[str]] = Field(
+        description="Query parameters from the original request, parsed as dictionary of lists"
+    )
+
+
+class AuthRequest(BaseModel):
+    api_key: str = Field(description="The API key extracted from the Authorization header")
+
+    request: AuthRequestContext = Field(description="Context information about the request being authenticated")
+
+
+class AuthResponse(BaseModel):
+    """The format of the authentication response from the auth endpoint."""
+
+    access_attributes: Optional[AccessAttributes] = Field(
+        default=None,
+        description="""
+        Structured user attributes for attribute-based access control.
+
+        These attributes determine which resources the user can access.
+        The model provides standard categories like "roles", "teams", "projects", and "namespaces".
+        Each attribute category contains a list of values that the user has for that category.
+        During access control checks, these values are compared against resource requirements.
+
+        Example with standard categories:
+        ```json
+        {
+            "roles": ["admin", "data-scientist"],
+            "teams": ["ml-team"],
+            "projects": ["llama-3"],
+            "namespaces": ["research"]
+        }
+        ```
+        """,
+    )
+
+    message: Optional[str] = Field(
+        default=None, description="Optional message providing additional context about the authentication result."
+    )
+
+
+class AuthenticationMiddleware:
+    """Middleware that authenticates requests using an external auth endpoint.
+
+    This middleware:
+    1. Extracts the Bearer token from the Authorization header
+    2. Sends it to the configured auth endpoint along with request details
+    3. Validates the response and extracts user attributes
+    4. Makes these attributes available to the route handlers for access control
+
+    Authentication Request Format:
+    ```json
+    {
+        "api_key": "the-api-key-extracted-from-auth-header",
+        "request": {
+            "path": "/models/list",
+            "headers": {
+                "content-type": "application/json",
+                "user-agent": "..."
+                // All headers except Authorization
+            },
+            "params": {
+                "limit": ["100"],
+                "offset": ["0"]
+                // Query parameters as key -> list of values
+            }
+        }
+    }
+    ```
+
+    Expected Auth Endpoint Response Format:
+    ```json
+    {
+        "access_attributes": {    // Structured attribute format
+            "roles": ["admin", "user"],
+            "teams": ["ml-team", "nlp-team"],
+            "projects": ["llama-3", "project-x"],
+            "namespaces": ["research"]
+        },
+        "message": "Optional message about auth result"
+    }
+    ```
+
+    Attribute-Based Access Control:
+    The attributes returned by the auth endpoint are used to determine which
+    resources the user can access. Resources can specify required attributes
+    using the access_attributes field. For a user to access a resource:
+
+    1. All attribute categories specified in the resource must be present in the user's attributes
+    2. For each category, the user must have at least one matching value
+
+    If the auth endpoint doesn't return any attributes, the user will only be able to
+    access resources that don't have access_attributes defined.
+    """
+
+    def __init__(self, app, auth_endpoint):
+        self.app = app
+        self.auth_endpoint = auth_endpoint
+
+    async def __call__(self, scope, receive, send):
+        if scope["type"] == "http":
+            headers = dict(scope.get("headers", []))
+            auth_header = headers.get(b"authorization", b"").decode()
+
+            if not auth_header or not auth_header.startswith("Bearer "):
+                return await self._send_auth_error(send, "Missing or invalid Authorization header")
+
+            api_key = auth_header.split("Bearer ", 1)[1]
+
+            path = scope.get("path", "")
+            request_headers = {k.decode(): v.decode() for k, v in headers.items()}
+
+            # Remove sensitive headers
+            if "authorization" in request_headers:
+                del request_headers["authorization"]
+
+            query_string = scope.get("query_string", b"").decode()
+            params = parse_qs(query_string)
+
+            # Build the auth request model
+            auth_request = AuthRequest(
+                api_key=api_key,
+                request=AuthRequestContext(
+                    path=path,
+                    headers=request_headers,
+                    params=params,
+                ),
+            )
+
+            # Validate with authentication endpoint
+            try:
+                async with httpx.AsyncClient() as client:
+                    response = await client.post(
+                        self.auth_endpoint,
+                        json=auth_request.model_dump(),
+                        timeout=10.0,  # Add a reasonable timeout
+                    )
+                    if response.status_code != 200:
+                        logger.warning(f"Authentication failed: {response.status_code}")
+                        return await self._send_auth_error(send, "Authentication failed")
+
+                    # Parse and validate the auth response
+                    try:
+                        response_data = response.json()
+                        auth_response = AuthResponse(**response_data)
+
+                        # Store attributes in request scope for access control
+                        if auth_response.access_attributes:
+                            user_attributes = auth_response.access_attributes.model_dump(exclude_none=True)
+                        else:
+                            logger.warning("No access attributes, setting namespace to api_key by default")
+                            user_attributes = {
+                                "namespaces": [api_key],
+                            }
+
+                        scope["user_attributes"] = user_attributes
+                        logger.debug(f"Authentication successful: {len(user_attributes)} attributes")
+                    except Exception:
+                        logger.exception("Error parsing authentication response")
+                        return await self._send_auth_error(send, "Invalid authentication response format")
+            except httpx.TimeoutException:
+                logger.exception("Authentication request timed out")
+                return await self._send_auth_error(send, "Authentication service timeout")
+            except Exception:
+                logger.exception("Error during authentication")
+                return await self._send_auth_error(send, "Authentication service error")
+
+        return await self.app(scope, receive, send)
+
+    async def _send_auth_error(self, send, message):
+        await send(
+            {
+                "type": "http.response.start",
+                "status": 401,
+                "headers": [[b"content-type", b"application/json"]],
+            }
+        )
+        error_msg = json.dumps({"error": {"message": message}}).encode()
+        await send({"type": "http.response.body", "body": error_msg})
--- a/llama_stack/distribution/server/endpoints.py
+++ b/llama_stack/distribution/server/endpoints.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.

 import inspect
+import re
 from typing import Dict, List

 from pydantic import BaseModel
@ -19,6 +20,7 @@ class ApiEndpoint(BaseModel):
    route: str
    method: str
    name: str
+    descriptive_name: str | None = None


 def toolgroup_protocol_map():
@ -58,8 +60,69 @@ def get_all_api_endpoints() -> Dict[Api, List[ApiEndpoint]]:
                method = "delete"
            else:
                method = "post"
-            endpoints.append(ApiEndpoint(route=route, method=method, name=name))
+            endpoints.append(
+                ApiEndpoint(route=route, method=method, name=name, descriptive_name=webmethod.descriptive_name)
+            )

        apis[api] = endpoints

    return apis
+
+
+def initialize_endpoint_impls(impls):
+    endpoints = get_all_api_endpoints()
+    endpoint_impls = {}
+
+    def _convert_path_to_regex(path: str) -> str:
+        # Convert {param} to named capture groups
+        # handle {param:path} as well which allows for forward slashes in the param value
+        pattern = re.sub(
+            r"{(\w+)(?::path)?}",
+            lambda m: f"(?P<{m.group(1)}>{'[^/]+' if not m.group(0).endswith(':path') else '.+'})",
+            path,
+        )
+
+        return f"^{pattern}$"
+
+    for api, api_endpoints in endpoints.items():
+        if api not in impls:
+            continue
+        for endpoint in api_endpoints:
+            impl = impls[api]
+            func = getattr(impl, endpoint.name)
+            if endpoint.method not in endpoint_impls:
+                endpoint_impls[endpoint.method] = {}
+            endpoint_impls[endpoint.method][_convert_path_to_regex(endpoint.route)] = (
+                func,
+                endpoint.descriptive_name or endpoint.route,
+            )
+
+    return endpoint_impls
+
+
+def find_matching_endpoint(method, path, endpoint_impls):
+    """Find the matching endpoint implementation for a given method and path.
+
+    Args:
+        method: HTTP method (GET, POST, etc.)
+        path: URL path to match against
+        endpoint_impls: A dictionary of endpoint implementations
+
+    Returns:
+        A tuple of (endpoint_function, path_params, descriptive_name)
+
+    Raises:
+        ValueError: If no matching endpoint is found
+    """
+    impls = endpoint_impls.get(method.lower())
+    if not impls:
+        raise ValueError(f"No endpoint found for {path}")
+
+    for regex, (func, descriptive_name) in impls.items():
+        match = re.match(regex, path)
+        if match:
+            # Extract named groups from the regex match
+            path_params = match.groupdict()
+            return func, path_params, descriptive_name
+
+    raise ValueError(f"No endpoint found for {path}")
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -15,7 +15,7 @@ import warnings
 from contextlib import asynccontextmanager
 from importlib.metadata import version as parse_version
 from pathlib import Path
-from typing import Any, List, Union
+from typing import Any, List, Optional, Union

 import yaml
 from fastapi import Body, FastAPI, HTTPException, Request
@ -25,19 +25,24 @@ from fastapi.responses import JSONResponse, StreamingResponse
 from pydantic import BaseModel, ValidationError
 from typing_extensions import Annotated

-from llama_stack.distribution.datatypes import StackRunConfig
+from llama_stack.distribution.datatypes import LoggingConfig, StackRunConfig
 from llama_stack.distribution.distribution import builtin_automatically_routed_apis
 from llama_stack.distribution.request_headers import (
-    preserve_headers_context_async_generator,
+    PROVIDER_DATA_VAR,
    request_provider_data_context,
 )
 from llama_stack.distribution.resolver import InvalidProviderError
+from llama_stack.distribution.server.endpoints import (
+    find_matching_endpoint,
+    initialize_endpoint_impls,
+)
 from llama_stack.distribution.stack import (
    construct_stack,
    redact_sensitive_fields,
    replace_env_vars,
    validate_env_pair,
 )
+from llama_stack.distribution.utils.context import preserve_contexts_async_generator
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
@ -45,11 +50,13 @@ from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
    TelemetryAdapter,
 )
 from llama_stack.providers.utils.telemetry.tracing import (
+    CURRENT_TRACE_CONTEXT,
    end_trace,
    setup_logger,
    start_trace,
 )

+from .auth import AuthenticationMiddleware
 from .endpoints import get_all_api_endpoints

 REPO_ROOT = Path(__file__).parent.parent.parent.parent
@ -176,13 +183,18 @@ async def sse_generator(event_gen):

 def create_dynamic_typed_route(func: Any, method: str, route: str):
    async def endpoint(request: Request, **kwargs):
-        # Use context manager for request provider data
-        with request_provider_data_context(request.headers):
+        # Get auth attributes from the request scope
+        user_attributes = request.scope.get("user_attributes", {})
+
+        # Use context manager with both provider data and auth attributes
+        with request_provider_data_context(request.headers, user_attributes):
            is_streaming = is_streaming_request(func.__name__, request, **kwargs)

            try:
                if is_streaming:
-                    gen = preserve_headers_context_async_generator(sse_generator(func(**kwargs)))
+                    gen = preserve_contexts_async_generator(
+                        sse_generator(func(**kwargs)), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
+                    )
                    return StreamingResponse(gen, media_type="text/event-stream")
                else:
                    value = func(**kwargs)
@ -214,14 +226,30 @@ def create_dynamic_typed_route(func: Any, method: str, route: str):


 class TracingMiddleware:
-    def __init__(self, app):
+    def __init__(self, app, impls):
        self.app = app
+        self.impls = impls

    async def __call__(self, scope, receive, send):
-        path = scope.get("path", "")
-        await start_trace(path, {"__location__": "server"})
-        try:
+        if scope.get("type") == "lifespan":
            return await self.app(scope, receive, send)
+
+        path = scope.get("path", "")
+        if not hasattr(self, "endpoint_impls"):
+            self.endpoint_impls = initialize_endpoint_impls(self.impls)
+        _, _, trace_path = find_matching_endpoint(scope.get("method", "GET"), path, self.endpoint_impls)
+
+        trace_context = await start_trace(trace_path, {"__location__": "server", "raw_path": path})
+
+        async def send_with_trace_id(message):
+            if message["type"] == "http.response.start":
+                headers = message.get("headers", [])
+                headers.append([b"x-trace-id", str(trace_context.trace_id).encode()])
+                message["headers"] = headers
+            await send(message)
+
+        try:
+            return await self.app(scope, receive, send_with_trace_id)
        finally:
            await end_trace()

@ -266,11 +294,17 @@ class ClientVersionMiddleware:
        return await self.app(scope, receive, send)


-def main():
+def main(args: Optional[argparse.Namespace] = None):
    """Start the LlamaStack server."""
    parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
    parser.add_argument(
        "--yaml-config",
+        dest="config",
+        help="(Deprecated) Path to YAML configuration file - use --config instead",
+    )
+    parser.add_argument(
+        "--config",
+        dest="config",
        help="Path to YAML configuration file",
    )
    parser.add_argument(
@ -300,45 +334,69 @@ def main():
        required="--tls-keyfile" in sys.argv,
    )

-    args = parser.parse_args()
+    # Determine whether the server args are being passed by the "run" command, if this is the case
+    # the args will be passed as a Namespace object to the main function, otherwise they will be
+    # parsed from the command line
+    if args is None:
+        args = parser.parse_args()

-    if args.env:
-        for env_pair in args.env:
-            try:
-                key, value = validate_env_pair(env_pair)
-                logger.info(f"Setting CLI environment variable {key} => {value}")
-                os.environ[key] = value
-            except ValueError as e:
-                logger.error(f"Error: {str(e)}")
-                sys.exit(1)
+    # Check for deprecated argument usage
+    if "--yaml-config" in sys.argv:
+        warnings.warn(
+            "The '--yaml-config' argument is deprecated and will be removed in a future version. Use '--config' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )

-    if args.yaml_config:
+    log_line = ""
+    if args.config:
        # if the user provided a config file, use it, even if template was specified
-        config_file = Path(args.yaml_config)
+        config_file = Path(args.config)
        if not config_file.exists():
            raise ValueError(f"Config file {config_file} does not exist")
-        logger.info(f"Using config file: {config_file}")
+        log_line = f"Using config file: {config_file}"
    elif args.template:
        config_file = Path(REPO_ROOT) / "llama_stack" / "templates" / args.template / "run.yaml"
        if not config_file.exists():
            raise ValueError(f"Template {args.template} does not exist")
-        logger.info(f"Using template {args.template} config file: {config_file}")
+        log_line = f"Using template {args.template} config file: {config_file}"
    else:
        raise ValueError("Either --yaml-config or --template must be provided")

+    logger_config = None
    with open(config_file, "r") as fp:
-        config = replace_env_vars(yaml.safe_load(fp))
+        config_contents = yaml.safe_load(fp)
+        if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
+            logger_config = LoggingConfig(**cfg)
+        logger = get_logger(name=__name__, category="server", config=logger_config)
+        if args.env:
+            for env_pair in args.env:
+                try:
+                    key, value = validate_env_pair(env_pair)
+                    logger.info(f"Setting CLI environment variable {key} => {value}")
+                    os.environ[key] = value
+                except ValueError as e:
+                    logger.error(f"Error: {str(e)}")
+                    sys.exit(1)
+        config = replace_env_vars(config_contents)
        config = StackRunConfig(**config)

+    # now that the logger is initialized, print the line about which type of config we are using.
+    logger.info(log_line)
+
    logger.info("Run configuration:")
    safe_config = redact_sensitive_fields(config.model_dump())
    logger.info(yaml.dump(safe_config, indent=2))

    app = FastAPI(lifespan=lifespan)
-    app.add_middleware(TracingMiddleware)
    if not os.environ.get("LLAMA_STACK_DISABLE_VERSION_CHECK"):
        app.add_middleware(ClientVersionMiddleware)

+    # Add authentication middleware if configured
+    if config.server.auth and config.server.auth.endpoint:
+        logger.info(f"Enabling authentication with endpoint: {config.server.auth.endpoint}")
+        app.add_middleware(AuthenticationMiddleware, auth_endpoint=config.server.auth.endpoint)
+
    try:
        impls = asyncio.run(construct_stack(config))
    except InvalidProviderError as e:
@ -348,7 +406,7 @@ def main():
    if Api.telemetry in impls:
        setup_logger(impls[Api.telemetry])
    else:
-        setup_logger(TelemetryAdapter(TelemetryConfig()))
+        setup_logger(TelemetryAdapter(TelemetryConfig(), {}))

    all_endpoints = get_all_api_endpoints()

@ -364,6 +422,7 @@ def main():
        apis_to_serve.add(inf.routing_table_api.value)

    apis_to_serve.add("inspect")
+    apis_to_serve.add("providers")
    for api_str in apis_to_serve:
        api = Api(api_str)

@ -393,6 +452,7 @@ def main():
    app.exception_handler(Exception)(global_exception_handler)

    app.__llama_stack_impls__ = impls
+    app.add_middleware(TracingMiddleware, impls=impls)

    import uvicorn

@ -422,6 +482,7 @@ def main():
        "host": listen_host,
        "port": port,
        "lifespan": "on",
+        "log_level": logger.getEffectiveLevel(),
    }
    if ssl_config:
        uvicorn_config.update(ssl_config)
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -25,6 +25,7 @@ from llama_stack.apis.models import Models
 from llama_stack.apis.post_training import PostTraining
 from llama_stack.apis.preprocessing import Preprocessing
 from llama_stack.apis.preprocessing.preprocessors import Preprocessors
+from llama_stack.apis.providers import Providers
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFunctions
@ -46,6 +47,7 @@ logger = get_logger(name=__name__, category="core")


 class LlamaStack(
+    Providers,
    VectorDBs,
    Inference,
    BatchInference,
--- a/llama_stack/distribution/start_stack.sh
+++ b/llama_stack/distribution/start_stack.sh
@ -13,6 +13,7 @@ LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-}
 LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
 TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
 PYPI_VERSION=${PYPI_VERSION:-}
+VIRTUAL_ENV=${VIRTUAL_ENV:-}

 set -euo pipefail

@ -69,22 +70,25 @@ while [[ $# -gt 0 ]]; do
    ;;
  esac
 done
-
 PYTHON_BINARY="python"
 case "$env_type" in
  "venv")
-    # Activate virtual environment
-    if [ ! -d "$env_path_or_name" ]; then
-        echo -e "${RED}Error: Virtual environment not found at $env_path_or_name${NC}" >&2
-        exit 1
-    fi
+    if [ -n "$VIRTUAL_ENV" && "$VIRTUAL_ENV" == "$env_path_or_name" ]; then
+        echo -e "${GREEN}Virtual environment already activated${NC}" >&2
+    else
+        # Activate virtual environment
+        if [ ! -d "$env_path_or_name" ]; then
+            echo -e "${RED}Error: Virtual environment not found at $env_path_or_name${NC}" >&2
+            exit 1
+        fi

-    if [ ! -f "$env_path_or_name/bin/activate" ]; then
-        echo -e "${RED}Error: Virtual environment activate binary not found at $env_path_or_name/bin/activate" >&2
-        exit 1
-    fi
+        if [ ! -f "$env_path_or_name/bin/activate" ]; then
+            echo -e "${RED}Error: Virtual environment activate binary not found at $env_path_or_name/bin/activate" >&2
+            exit 1
+        fi

-    source "$env_path_or_name/bin/activate"
+        source "$env_path_or_name/bin/activate"
+    fi
    ;;
  "conda")
    if ! is_command_available conda; then
--- a/llama_stack/distribution/store/registry.py
+++ b/llama_stack/distribution/store/registry.py
@ -12,9 +12,12 @@ import pydantic

 from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWithProvider
 from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig

+logger = get_logger(__name__, category="core")
+

 class DistributionRegistry(Protocol):
    async def get_all(self) -> List[RoutableObjectWithProvider]: ...
@ -47,8 +50,13 @@ def _parse_registry_values(values: List[str]) -> List[RoutableObjectWithProvider
    """Utility function to parse registry values into RoutableObjectWithProvider objects."""
    all_objects = []
    for value in values:
-        obj = pydantic.TypeAdapter(RoutableObjectWithProvider).validate_json(value)
-        all_objects.append(obj)
+        try:
+            obj = pydantic.TypeAdapter(RoutableObjectWithProvider).validate_json(value)
+            all_objects.append(obj)
+        except pydantic.ValidationError as e:
+            logger.error(f"Error parsing registry value, raw value: {value}. Error: {e}")
+            continue
+
    return all_objects


@ -73,7 +81,11 @@ class DiskDistributionRegistry(DistributionRegistry):
        if not json_str:
            return None

-        return pydantic.TypeAdapter(RoutableObjectWithProvider).validate_json(json_str)
+        try:
+            return pydantic.TypeAdapter(RoutableObjectWithProvider).validate_json(json_str)
+        except pydantic.ValidationError as e:
+            logger.error(f"Error parsing registry value for {type}:{identifier}, raw value: {json_str}. Error: {e}")
+            return None

    async def update(self, obj: RoutableObjectWithProvider) -> None:
        await self.kvstore.set(
--- a/llama_stack/distribution/ui/Containerfile
+++ b/llama_stack/distribution/ui/Containerfile
@ -0,0 +1,11 @@
+# More info on playground configuration can be found here:
+# https://llama-stack.readthedocs.io/en/latest/playground
+
+FROM python:3.9-slim
+WORKDIR /app
+COPY . /app/
+RUN /usr/local/bin/python -m pip install --upgrade pip && \
+    /usr/local/bin/pip3 install -r requirements.txt
+EXPOSE 8501
+
+ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
--- a/llama_stack/distribution/ui/README.md
+++ b/llama_stack/distribution/ui/README.md
@ -40,3 +40,13 @@ cd llama_stack/distribution/ui
 pip install -r requirements.txt
 streamlit run app.py
 ```
+
+## Environment Variables
+
+| Environment Variable       | Description                        | Default Value             |
+|----------------------------|------------------------------------|---------------------------|
+| LLAMA_STACK_ENDPOINT       | The endpoint for the Llama Stack   | http://localhost:8321     |
+| FIREWORKS_API_KEY          | API key for Fireworks provider     | (empty string)            |
+| TOGETHER_API_KEY           | API key for Together provider      | (empty string)            |
+| SAMBANOVA_API_KEY          | API key for SambaNova provider     | (empty string)            |
+| OPENAI_API_KEY             | API key for OpenAI provider        | (empty string)            |
--- a/llama_stack/distribution/ui/page/distribution/datasets.py
+++ b/llama_stack/distribution/ui/page/distribution/datasets.py
@ -5,7 +5,8 @@
 # the root directory of this source tree.

 import streamlit as st
-from modules.api import llama_stack_api
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api


 def datasets():
--- a/llama_stack/distribution/ui/page/distribution/eval_tasks.py
+++ b/llama_stack/distribution/ui/page/distribution/eval_tasks.py
@ -5,7 +5,8 @@
 # the root directory of this source tree.

 import streamlit as st
-from modules.api import llama_stack_api
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api


 def benchmarks():
--- a/llama_stack/distribution/ui/page/distribution/models.py
+++ b/llama_stack/distribution/ui/page/distribution/models.py
@ -5,7 +5,8 @@
 # the root directory of this source tree.

 import streamlit as st
-from modules.api import llama_stack_api
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api


 def models():
--- a/llama_stack/distribution/ui/page/distribution/providers.py
+++ b/llama_stack/distribution/ui/page/distribution/providers.py
@ -5,7 +5,8 @@
 # the root directory of this source tree.

 import streamlit as st
-from modules.api import llama_stack_api
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api


 def providers():
--- a/llama_stack/distribution/ui/page/distribution/resources.py
+++ b/llama_stack/distribution/ui/page/distribution/resources.py
@ -4,14 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from page.distribution.benchmarks import benchmarks
-from page.distribution.datasets import datasets
-from page.distribution.models import models
-from page.distribution.scoring_functions import scoring_functions
-from page.distribution.shields import shields
-from page.distribution.vector_dbs import vector_dbs
 from streamlit_option_menu import option_menu

+from llama_stack.distribution.ui.page.distribution.datasets import datasets
+from llama_stack.distribution.ui.page.distribution.eval_tasks import benchmarks
+from llama_stack.distribution.ui.page.distribution.models import models
+from llama_stack.distribution.ui.page.distribution.scoring_functions import scoring_functions
+from llama_stack.distribution.ui.page.distribution.shields import shields
+from llama_stack.distribution.ui.page.distribution.vector_dbs import vector_dbs
+

 def resources_page():
    options = [
--- a/llama_stack/distribution/ui/page/distribution/scoring_functions.py
+++ b/llama_stack/distribution/ui/page/distribution/scoring_functions.py
@ -5,7 +5,8 @@
 # the root directory of this source tree.

 import streamlit as st
-from modules.api import llama_stack_api
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api


 def scoring_functions():
--- a/llama_stack/distribution/ui/page/distribution/shields.py
+++ b/llama_stack/distribution/ui/page/distribution/shields.py
@ -5,7 +5,8 @@
 # the root directory of this source tree.

 import streamlit as st
-from modules.api import llama_stack_api
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api


 def shields():
--- a/llama_stack/distribution/ui/page/distribution/vector_dbs.py
+++ b/llama_stack/distribution/ui/page/distribution/vector_dbs.py
@ -5,7 +5,8 @@
 # the root directory of this source tree.

 import streamlit as st
-from modules.api import llama_stack_api
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api


 def vector_dbs():
--- a/llama_stack/distribution/ui/page/evaluations/app_eval.py
+++ b/llama_stack/distribution/ui/page/evaluations/app_eval.py
@ -8,8 +8,9 @@ import json

 import pandas as pd
 import streamlit as st
-from modules.api import llama_stack_api
-from modules.utils import process_dataset
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.utils import process_dataset


 def application_evaluation_page():
--- a/llama_stack/distribution/ui/page/evaluations/native_eval.py
+++ b/llama_stack/distribution/ui/page/evaluations/native_eval.py
@ -8,7 +8,8 @@ import json

 import pandas as pd
 import streamlit as st
-from modules.api import llama_stack_api
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api


 def select_benchmark_1():
@ -166,11 +167,10 @@ def run_evaluation_3():
    eval_candidate = st.session_state["eval_candidate"]

    dataset_id = benchmarks[selected_benchmark].dataset_id
-    rows = llama_stack_api.client.datasetio.get_rows_paginated(
+    rows = llama_stack_api.client.datasets.iterrows(
        dataset_id=dataset_id,
-        rows_in_page=-1,
    )
-    total_rows = len(rows.rows)
+    total_rows = len(rows.data)
    # Add number of examples control
    num_rows = st.number_input(
        "Number of Examples to Evaluate",
@ -195,7 +195,7 @@ def run_evaluation_3():
    if st.button("Run Evaluation"):
        progress_text = "Running evaluation..."
        progress_bar = st.progress(0, text=progress_text)
-        rows = rows.rows
+        rows = rows.data
        if num_rows < total_rows:
            rows = rows[:num_rows]

--- a/llama_stack/distribution/ui/page/playground/chat.py
+++ b/llama_stack/distribution/ui/page/playground/chat.py
@ -5,7 +5,8 @@
 # the root directory of this source tree.

 import streamlit as st
-from modules.api import llama_stack_api
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api

 # Sidebar configurations
 with st.sidebar:
--- a/llama_stack/distribution/ui/page/playground/rag.py
+++ b/llama_stack/distribution/ui/page/playground/rag.py
@ -5,11 +5,10 @@
 # the root directory of this source tree.

 import streamlit as st
-from llama_stack_client.lib.agents.agent import Agent
-from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types.memory_insert_params import Document
-from modules.api import llama_stack_api
-from modules.utils import data_url_from_file
+from llama_stack_client import Agent, AgentEventLogger, RAGDocument
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api
+from llama_stack.distribution.ui.modules.utils import data_url_from_file


 def rag_chat_page():
@ -34,7 +33,7 @@ def rag_chat_page():
            )
            if st.button("Create Vector Database"):
                documents = [
-                    Document(
+                    RAGDocument(
                        document_id=uploaded_file.name,
                        content=data_url_from_file(uploaded_file),
                    )
@ -59,6 +58,7 @@ def rag_chat_page():
                llama_stack_api.client.tool_runtime.rag_tool.insert(
                    vector_db_id=vector_db_name,  # Use the user-provided name
                    documents=documents,
+                    chunk_size_in_tokens=512,
                )
                st.success("Vector database created successfully!")

@ -166,7 +166,7 @@ def rag_chat_page():
            message_placeholder = st.empty()
            full_response = ""
            retrieval_response = ""
-            for log in EventLogger().log(response):
+            for log in AgentEventLogger().log(response):
                log.print()
                if log.role == "tool_execution":
                    retrieval_response += log.content.replace("====", "").strip()
--- a/llama_stack/distribution/utils/chain.py
+++ b/llama_stack/distribution/utils/chain.py
@ -41,10 +41,10 @@ async def execute_preprocessor_chain(
    preprocessor_inputs: List[PreprocessingDataElement],
 ) -> PreprocessorResponse:
    if not validate_chain(preprocessor_chain_impls):
-        return PreprocessorResponse(success=False, results=[])
+        return PreprocessorResponse(success=False, output_data_type=None, results=[])

    current_inputs = preprocessor_inputs
-    current_outputs = []
+    current_outputs: List[PreprocessingDataElement] | None = []
    current_result_type = None

    # TODO: replace with a parallel implementation
@ -59,6 +59,9 @@ async def execute_preprocessor_chain(
            log.error(f"Preprocessor {current_params.preprocessor_id} returned an error")
            return PreprocessorResponse(success=False, output_data_type=response.output_data_type, results=[])
        current_outputs = response.results
+        if current_outputs is None:
+            log.error(f"Preprocessor {current_params.preprocessor_id} returned invalid results")
+            return PreprocessorResponse(success=False, output_data_type=response.output_data_type, results=[])
        current_inputs = current_outputs
        current_result_type = response.output_data_type

--- a/llama_stack/distribution/utils/context.py
+++ b/llama_stack/distribution/utils/context.py
@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from contextvars import ContextVar
+from typing import AsyncGenerator, List, TypeVar
+
+T = TypeVar("T")
+
+
+def preserve_contexts_async_generator(
+    gen: AsyncGenerator[T, None], context_vars: List[ContextVar]
+) -> AsyncGenerator[T, None]:
+    """
+    Wraps an async generator to preserve context variables across iterations.
+    This is needed because we start a new asyncio event loop for each streaming request,
+    and we need to preserve the context across the event loop boundary.
+    """
+    # Capture initial context values
+    initial_context_values = {context_var.name: context_var.get() for context_var in context_vars}
+
+    async def wrapper() -> AsyncGenerator[T, None]:
+        while True:
+            try:
+                # Restore context values before any await
+                for context_var in context_vars:
+                    context_var.set(initial_context_values[context_var.name])
+
+                item = await gen.__anext__()
+                yield item
+
+            except StopAsyncIteration:
+                break
+
+    return wrapper()
--- a/llama_stack/distribution/utils/exec.py
+++ b/llama_stack/distribution/utils/exec.py
@ -4,13 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import errno
 import logging
 import os
-import select
 import signal
 import subprocess
-import sys

 from termcolor import cprint

@ -88,13 +85,6 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
    return run_args


-def run_with_pty(command):
-    if sys.platform.startswith("win"):
-        return _run_with_pty_win(command)
-    else:
-        return _run_with_pty_unix(command)
-
-
 def in_notebook():
    try:
        from IPython import get_ipython
@ -108,19 +98,19 @@ def in_notebook():
    return True


-# run a command in a pseudo-terminal, with interrupt handling,
-# useful when you want to run interactive things
-def _run_with_pty_unix(command):
-    import pty
-    import termios
+def run_command(command: list[str]) -> int:
+    """
+    Run a command with interrupt handling and output capture.
+    Uses subprocess.run with direct stream piping for better performance.

-    master, slave = pty.openpty()
+    Args:
+        command (list): The command to run.

-    old_settings = termios.tcgetattr(sys.stdin)
+    Returns:
+        int: The return code of the command.
+    """
    original_sigint = signal.getsignal(signal.SIGINT)
-
    ctrl_c_pressed = False
-    process = None

    def sigint_handler(signum, frame):
        nonlocal ctrl_c_pressed
@ -131,106 +121,19 @@ def _run_with_pty_unix(command):
        # Set up the signal handler
        signal.signal(signal.SIGINT, sigint_handler)

-        new_settings = termios.tcgetattr(sys.stdin)
-        new_settings[3] = new_settings[3] & ~termios.ECHO  # Disable echo
-        new_settings[3] = new_settings[3] & ~termios.ICANON  # Disable canonical mode
-        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings)
-
-        process = subprocess.Popen(
+        # Run the command with stdout/stderr piped directly to system streams
+        result = subprocess.run(
            command,
-            stdin=slave,
-            stdout=slave,
-            stderr=slave,
-            universal_newlines=True,
-            preexec_fn=os.setsid,
+            text=True,
+            check=False,
        )
-
-        # Close the slave file descriptor as it's now owned by the subprocess
-        os.close(slave)
-
-        def handle_io():
-            while not ctrl_c_pressed:
-                try:
-                    rlist, _, _ = select.select([sys.stdin, master], [], [], 0.1)
-
-                    if sys.stdin in rlist:
-                        data = os.read(sys.stdin.fileno(), 1024)
-                        if not data:
-                            break
-                        os.write(master, data)
-
-                    if master in rlist:
-                        data = os.read(master, 1024)
-                        if not data:
-                            break
-                        sys.stdout.buffer.write(data)
-                        sys.stdout.flush()
-
-                except KeyboardInterrupt:
-                    # This will be raised when Ctrl+C is pressed
-                    break
-
-                if process.poll() is not None:
-                    break
-
-        handle_io()
-    except (EOFError, KeyboardInterrupt):
-        pass
-    except OSError as e:
-        if e.errno != errno.EIO:
-            raise
-    finally:
-        # Clean up
-        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
-        signal.signal(signal.SIGINT, original_sigint)
-
-        os.close(master)
-        if process and process.poll() is None:
-            process.terminate()
-            process.wait()
-
-    return process.returncode
-
-
-# run a command in a pseudo-terminal in windows, with interrupt handling,
-def _run_with_pty_win(command):
-    """
-    Runs a command with interactive support using subprocess directly.
-    """
-    try:
-        # For shell scripts on Windows, use appropriate shell
-        if isinstance(command, (list, tuple)):
-            if command[0].endswith(".sh"):
-                if os.path.exists("/usr/bin/bash"):  # WSL
-                    command = ["bash"] + command
-                else:
-                    # Use cmd.exe with bash while preserving all arguments
-                    command = ["cmd.exe", "/c", "bash"] + command
-
-        process = subprocess.Popen(
-            command,
-            shell=True,
-            universal_newlines=True,
-        )
-
-        process.wait()
-
+        return result.returncode
+    except subprocess.SubprocessError as e:
+        log.error(f"Subprocess error: {e}")
+        return 1
    except Exception as e:
-        print(f"Error: {str(e)}")
+        log.exception(f"Unexpected error: {e}")
        return 1
    finally:
-        if process and process.poll() is None:
-            process.terminate()
-            process.wait()
-    return process.returncode
-
-
-def run_command(command):
-    try:
-        result = subprocess.run(command, capture_output=True, text=True, check=True)
-        print("Script Output\n", result.stdout)
-        return result.returncode
-    except subprocess.CalledProcessError as e:
-        print("Error running script:", e)
-        print("Error output:", e.stderr)
-        return e.returncode
+        # Restore the original signal handler
+        signal.signal(signal.SIGINT, original_sigint)
--- a/llama_stack/distribution/utils/tests/test_context.py
+++ b/llama_stack/distribution/utils/tests/test_context.py
@ -0,0 +1,155 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+from contextvars import ContextVar
+
+import pytest
+
+from llama_stack.distribution.utils.context import preserve_contexts_async_generator
+
+
+@pytest.mark.asyncio
+async def test_preserve_contexts_with_exception():
+    # Create context variable
+    context_var = ContextVar("exception_var", default="initial")
+    token = context_var.set("start_value")
+
+    # Create an async generator that raises an exception
+    async def exception_generator():
+        yield context_var.get()
+        context_var.set("modified")
+        raise ValueError("Test exception")
+        yield None  # This will never be reached
+
+    # Wrap the generator
+    wrapped_gen = preserve_contexts_async_generator(exception_generator(), [context_var])
+
+    # First iteration should work
+    value = await wrapped_gen.__anext__()
+    assert value == "start_value"
+
+    # Second iteration should raise the exception
+    with pytest.raises(ValueError, match="Test exception"):
+        await wrapped_gen.__anext__()
+
+    # Clean up
+    context_var.reset(token)
+
+
+@pytest.mark.asyncio
+async def test_preserve_contexts_empty_generator():
+    # Create context variable
+    context_var = ContextVar("empty_var", default="initial")
+    token = context_var.set("value")
+
+    # Create an empty async generator
+    async def empty_generator():
+        if False:  # This condition ensures the generator yields nothing
+            yield None
+
+    # Wrap the generator
+    wrapped_gen = preserve_contexts_async_generator(empty_generator(), [context_var])
+
+    # The generator should raise StopAsyncIteration immediately
+    with pytest.raises(StopAsyncIteration):
+        await wrapped_gen.__anext__()
+
+    # Context variable should remain unchanged
+    assert context_var.get() == "value"
+
+    # Clean up
+    context_var.reset(token)
+
+
+@pytest.mark.asyncio
+async def test_preserve_contexts_across_event_loops():
+    """
+    Test that context variables are preserved across event loop boundaries with nested generators.
+    This simulates the real-world scenario where:
+    1. A new event loop is created for each streaming request
+    2. The async generator runs inside that loop
+    3. There are multiple levels of nested generators
+    4. Context needs to be preserved across these boundaries
+    """
+    # Create context variables
+    request_id = ContextVar("request_id", default=None)
+    user_id = ContextVar("user_id", default=None)
+
+    # Set initial values
+
+    # Results container to verify values across thread boundaries
+    results = []
+
+    # Inner-most generator (level 2)
+    async def inner_generator():
+        # Should have the context from the outer scope
+        yield (1, request_id.get(), user_id.get())
+
+        # Modify one context variable
+        user_id.set("user-modified")
+
+        # Should reflect the modification
+        yield (2, request_id.get(), user_id.get())
+
+    # Middle generator (level 1)
+    async def middle_generator():
+        inner_gen = inner_generator()
+
+        # Forward the first yield from inner
+        item = await inner_gen.__anext__()
+        yield item
+
+        # Forward the second yield from inner
+        item = await inner_gen.__anext__()
+        yield item
+
+        request_id.set("req-modified")
+
+        # Add our own yield with both modified variables
+        yield (3, request_id.get(), user_id.get())
+
+    # Function to run in a separate thread with a new event loop
+    def run_in_new_loop():
+        # Create a new event loop for this thread
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+
+        try:
+            # Outer generator (runs in the new loop)
+            async def outer_generator():
+                request_id.set("req-12345")
+                user_id.set("user-6789")
+                # Wrap the middle generator
+                wrapped_gen = preserve_contexts_async_generator(middle_generator(), [request_id, user_id])
+
+                # Process all items from the middle generator
+                async for item in wrapped_gen:
+                    # Store results for verification
+                    results.append(item)
+
+            # Run the outer generator in the new loop
+            loop.run_until_complete(outer_generator())
+        finally:
+            loop.close()
+
+    # Run the generator chain in a separate thread with a new event loop
+    with ThreadPoolExecutor(max_workers=1) as executor:
+        future = executor.submit(run_in_new_loop)
+        future.result()  # Wait for completion
+
+    # Verify the results
+    assert len(results) == 3
+
+    # First yield should have original values
+    assert results[0] == (1, "req-12345", "user-6789")
+
+    # Second yield should have modified user_id
+    assert results[1] == (2, "req-12345", "user-modified")
+
+    # Third yield should have both modified values
+    assert results[2] == (3, "req-modified", "user-modified")