diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 4f1c143d2..99e0d0043 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -36,6 +36,21 @@ jobs:
             **/requirements*.txt
             .pre-commit-config.yaml
 
+      # npm ci may fail -
+      #   npm error `npm ci` can only install packages when your package.json and package-lock.json or npm-shrinkwrap.json are in sync. Please update your lock file with `npm install` before continuing.
+      #   npm error Invalid: lock file's llama-stack-client@0.2.17 does not satisfy llama-stack-client@0.2.18
+
+      # - name: Set up Node.js
+      #   uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
+      #   with:
+      #     node-version: '20'
+      #     cache: 'npm'
+      #     cache-dependency-path: 'llama_stack/ui/'
+
+      # - name: Install npm dependencies
+      #   run: npm ci
+      #   working-directory: llama_stack/ui
+
       - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
         continue-on-error: true
         env:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4309f289a..d25455cf0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -146,20 +146,50 @@ repos:
         pass_filenames: false
         require_serial: true
         files: ^.github/workflows/.*$
-      - id: ui-prettier
-        name: Format UI code with Prettier
-        entry: bash -c 'cd llama_stack/ui && npm run format'
+      # ui-prettier and ui-eslint are disabled until we can avoid `npm ci`, which is slow and may fail -
+      #   npm error `npm ci` can only install packages when your package.json and package-lock.json or npm-shrinkwrap.json are in sync. Please update your lock file with `npm install` before continuing.
+      #   npm error Invalid: lock file's llama-stack-client@0.2.17 does not satisfy llama-stack-client@0.2.18
+      # and until we have infra for installing prettier and next via npm -
+      #   Lint UI code with ESLint.....................................................Failed
+      #   - hook id: ui-eslint
+      #   - exit code: 127
+      #   > ui@0.1.0 lint
+      #   > next lint --fix --quiet
+      #   sh: line 1: next: command not found
+      #
+      # - id: ui-prettier
+      #   name: Format UI code with Prettier
+      #   entry: bash -c 'cd llama_stack/ui && npm ci && npm run format'
+      #   language: system
+      #   files: ^llama_stack/ui/.*\.(ts|tsx)$
+      #   pass_filenames: false
+      #   require_serial: true
+      # - id: ui-eslint
+      #   name: Lint UI code with ESLint
+      #   entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet'
+      #   language: system
+      #   files: ^llama_stack/ui/.*\.(ts|tsx)$
+      #   pass_filenames: false
+      #   require_serial: true
+
+      - id: check-log-usage
+        name: Ensure 'llama_stack.log' usage for logging
+        entry: bash
         language: system
-        files: ^llama_stack/ui/.*\.(ts|tsx)$
-        pass_filenames: false
-        require_serial: true
-      - id: ui-eslint
-        name: Lint UI code with ESLint
-        entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet'
-        language: system
-        files: ^llama_stack/ui/.*\.(ts|tsx)$
-        pass_filenames: false
-        require_serial: true
+        types: [python]
+        pass_filenames: true
+        args:
+          - -c
+          - |
+            matches=$(grep -EnH '^[^#]*\b(import\s+logging|from\s+logging\b)' "$@" | grep -v -e '#\s*allow-direct-logging' || true)
+            if [ -n "$matches" ]; then
+              # GitHub Actions annotation format
+              while IFS=: read -r file line_num rest; do
+                echo "::error file=$file,line=$line_num::Do not use 'import logging' or 'from logging import' in $file. Use the custom log instead: from llama_stack.log import get_logger; logger = get_logger(). If direct logging is truly needed, add: # allow-direct-logging"
+              done <<< "$matches"
+              exit 1
+            fi
+            exit 0
 
 ci:
     autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
diff --git a/docs/source/distributions/k8s-benchmark/benchmark.py b/docs/source/distributions/k8s-benchmark/benchmark.py
index 0e7368431..3d0d18150 100644
--- a/docs/source/distributions/k8s-benchmark/benchmark.py
+++ b/docs/source/distributions/k8s-benchmark/benchmark.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
diff --git a/llama_stack/core/build.py b/llama_stack/core/build.py
index 4b20588fd..fa1fe632b 100644
--- a/llama_stack/core/build.py
+++ b/llama_stack/core/build.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import importlib.resources
-import logging
 import sys
 
 from pydantic import BaseModel
@@ -17,9 +16,10 @@ from llama_stack.core.external import load_external_apis
 from llama_stack.core.utils.exec import run_command
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.distributions.template import DistributionTemplate
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="core")
 
 # These are the dependencies needed by the distribution server.
 # `llama-stack` is automatically installed by the installation script.
diff --git a/llama_stack/core/configure.py b/llama_stack/core/configure.py
index 9e18b438c..64473c053 100644
--- a/llama_stack/core/configure.py
+++ b/llama_stack/core/configure.py
@@ -3,7 +3,6 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-import logging
 import textwrap
 from typing import Any
 
@@ -21,9 +20,10 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, ProviderSpec
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="core")
 
 
 def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provider) -> Provider:
diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py
index a93fe509e..dd1fc8a50 100644
--- a/llama_stack/core/library_client.py
+++ b/llama_stack/core/library_client.py
@@ -7,7 +7,7 @@
 import asyncio
 import inspect
 import json
-import logging
+import logging  # allow-direct-logging
 import os
 import sys
 from concurrent.futures import ThreadPoolExecutor
@@ -48,6 +48,7 @@ from llama_stack.core.stack import (
 from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.core.utils.exec import in_notebook
+from llama_stack.log import get_logger
 from llama_stack.providers.utils.telemetry.tracing import (
     CURRENT_TRACE_CONTEXT,
     end_trace,
@@ -55,7 +56,7 @@ from llama_stack.providers.utils.telemetry.tracing import (
     start_trace,
 )
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="core")
 
 T = TypeVar("T")
 
diff --git a/llama_stack/core/request_headers.py b/llama_stack/core/request_headers.py
index 35ac72775..f1ce8281f 100644
--- a/llama_stack/core/request_headers.py
+++ b/llama_stack/core/request_headers.py
@@ -6,15 +6,15 @@
 
 import contextvars
 import json
-import logging
 from contextlib import AbstractContextManager
 from typing import Any
 
 from llama_stack.core.datatypes import User
+from llama_stack.log import get_logger
 
 from .utils.dynamic import instantiate_class_type
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="core")
 
 # Context variable for request provider data and auth attributes
 PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)
diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py
index cbef8ef88..3d94b6e81 100644
--- a/llama_stack/core/server/server.py
+++ b/llama_stack/core/server/server.py
@@ -9,7 +9,7 @@ import asyncio
 import functools
 import inspect
 import json
-import logging
+import logging  # allow-direct-logging
 import os
 import ssl
 import sys
diff --git a/llama_stack/core/utils/exec.py b/llama_stack/core/utils/exec.py
index 1b2b782fe..12fb82d01 100644
--- a/llama_stack/core/utils/exec.py
+++ b/llama_stack/core/utils/exec.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
+import importlib
 import os
 import signal
 import subprocess
@@ -12,9 +12,9 @@ import sys
 
 from termcolor import cprint
 
-log = logging.getLogger(__name__)
+from llama_stack.log import get_logger
 
-import importlib
+log = get_logger(name=__name__, category="core")
 
 
 def formulate_run_args(image_type: str, image_name: str) -> list:
diff --git a/llama_stack/core/utils/prompt_for_config.py b/llama_stack/core/utils/prompt_for_config.py
index 26f6920e0..bac0531ed 100644
--- a/llama_stack/core/utils/prompt_for_config.py
+++ b/llama_stack/core/utils/prompt_for_config.py
@@ -6,7 +6,6 @@
 
 import inspect
 import json
-import logging
 from enum import Enum
 from typing import Annotated, Any, Literal, Union, get_args, get_origin
 
@@ -14,7 +13,9 @@ from pydantic import BaseModel
 from pydantic.fields import FieldInfo
 from pydantic_core import PydanticUndefinedType
 
-log = logging.getLogger(__name__)
+from llama_stack.log import get_logger
+
+log = get_logger(name=__name__, category="core")
 
 
 def is_list_of_primitives(field_type):
diff --git a/llama_stack/log.py b/llama_stack/log.py
index d67bd1b61..cc4c9d4cf 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
+import logging  # allow-direct-logging
 import os
 import re
-from logging.config import dictConfig
+from logging.config import dictConfig  # allow-direct-logging
 
 from rich.console import Console
 from rich.errors import MarkupError
diff --git a/llama_stack/models/llama/llama3/multimodal/encoder_utils.py b/llama_stack/models/llama/llama3/multimodal/encoder_utils.py
index 5b5969d89..90ced13b2 100644
--- a/llama_stack/models/llama/llama3/multimodal/encoder_utils.py
+++ b/llama_stack/models/llama/llama3/multimodal/encoder_utils.py
@@ -13,14 +13,15 @@
 
 # Copyright (c) Meta Platforms, Inc. and its affiliates.
 import math
-from logging import getLogger
 
 import torch
 import torch.nn.functional as F
 
+from llama_stack.log import get_logger
+
 from .utils import get_negative_inf_value, to_2tuple
 
-logger = getLogger()
+logger = get_logger(name=__name__, category="models::llama")
 
 
 def resize_local_position_embedding(orig_pos_embed, grid_size):
diff --git a/llama_stack/models/llama/llama3/multimodal/image_transform.py b/llama_stack/models/llama/llama3/multimodal/image_transform.py
index f2761ee47..7b20a31fa 100644
--- a/llama_stack/models/llama/llama3/multimodal/image_transform.py
+++ b/llama_stack/models/llama/llama3/multimodal/image_transform.py
@@ -13,7 +13,6 @@
 
 import math
 from collections import defaultdict
-from logging import getLogger
 from typing import Any
 
 import torch
@@ -21,9 +20,11 @@ import torchvision.transforms as tv
 from PIL import Image
 from torchvision.transforms import functional as F
 
+from llama_stack.log import get_logger
+
 IMAGE_RES = 224
 
-logger = getLogger()
+logger = get_logger(name=__name__, category="models::llama")
 
 
 class VariableSizeImageTransform:
diff --git a/llama_stack/models/llama/llama3/multimodal/model.py b/llama_stack/models/llama/llama3/multimodal/model.py
index 5f1c3605c..096156a5f 100644
--- a/llama_stack/models/llama/llama3/multimodal/model.py
+++ b/llama_stack/models/llama/llama3/multimodal/model.py
@@ -3,8 +3,6 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-
-import logging
 import math
 from collections.abc import Callable
 from functools import partial
@@ -22,6 +20,8 @@ from PIL import Image as PIL_Image
 from torch import Tensor, nn
 from torch.distributed import _functional_collectives as funcol
 
+from llama_stack.log import get_logger
+
 from ..model import ModelArgs, RMSNorm, apply_rotary_emb, precompute_freqs_cis
 from .encoder_utils import (
     build_encoder_attention_mask,
@@ -34,9 +34,10 @@ from .encoder_utils import (
 from .image_transform import VariableSizeImageTransform
 from .utils import get_negative_inf_value, to_2tuple
 
-logger = logging.getLogger(__name__)
 MP_SCALE = 8
 
+logger = get_logger(name=__name__, category="models")
+
 
 def reduce_from_tensor_model_parallel_region(input_):
     """All-reduce the input tensor across model parallel group."""
@@ -771,7 +772,7 @@ class TilePositionEmbedding(nn.Module):
         if embed is not None:
             # reshape the weights to the correct shape
             nt_old, nt_old, _, w = embed.shape
-            logging.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}")
+            logger.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}")
             embed_new = TilePositionEmbedding._dynamic_resize(embed, self.num_tiles)
             # assign the weights to the module
             state_dict[prefix + "embedding"] = embed_new
diff --git a/llama_stack/models/llama/llama3/tokenizer.py b/llama_stack/models/llama/llama3/tokenizer.py
index e47b579e3..ad7ced1c5 100644
--- a/llama_stack/models/llama/llama3/tokenizer.py
+++ b/llama_stack/models/llama/llama3/tokenizer.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+
 from collections.abc import Collection, Iterator, Sequence, Set
-from logging import getLogger
 from pathlib import Path
 from typing import (
     Literal,
@@ -14,11 +14,9 @@ from typing import (
 
 import tiktoken
 
+from llama_stack.log import get_logger
 from llama_stack.models.llama.tokenizer_utils import load_bpe_file
 
-logger = getLogger(__name__)
-
-
 # The tiktoken tokenizer can handle <=400k chars without
 # pyo3_runtime.PanicException.
 TIKTOKEN_MAX_ENCODE_CHARS = 400_000
@@ -31,6 +29,8 @@ MAX_NO_WHITESPACES_CHARS = 25_000
 
 _INSTANCE = None
 
+logger = get_logger(name=__name__, category="models::llama")
+
 
 class Tokenizer:
     """
diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/llama_stack/models/llama/llama4/quantization/loader.py
index 223744a5f..8220a9040 100644
--- a/llama_stack/models/llama/llama4/quantization/loader.py
+++ b/llama_stack/models/llama/llama4/quantization/loader.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import os
 from collections.abc import Callable
 
@@ -13,11 +12,13 @@ from fairscale.nn.model_parallel.initialize import get_model_parallel_rank
 from torch import Tensor, nn
 from torch.nn import functional as F
 
+from llama_stack.log import get_logger
+
 from ...datatypes import QuantizationMode
 from ..model import Transformer, TransformerBlock
 from ..moe import MoE
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="models")
 
 
 def swiglu_wrapper_no_reduce(
diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/llama_stack/models/llama/llama4/tokenizer.py
index e12b2cae0..bfbace8f9 100644
--- a/llama_stack/models/llama/llama4/tokenizer.py
+++ b/llama_stack/models/llama/llama4/tokenizer.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 from collections.abc import Collection, Iterator, Sequence, Set
-from logging import getLogger
 from pathlib import Path
 from typing import (
     Literal,
@@ -14,11 +13,9 @@ from typing import (
 
 import tiktoken
 
+from llama_stack.log import get_logger
 from llama_stack.models.llama.tokenizer_utils import load_bpe_file
 
-logger = getLogger(__name__)
-
-
 # The tiktoken tokenizer can handle <=400k chars without
 # pyo3_runtime.PanicException.
 TIKTOKEN_MAX_ENCODE_CHARS = 400_000
@@ -101,6 +98,8 @@ BASIC_SPECIAL_TOKENS = [
     "<|fim_suffix|>",
 ]
 
+logger = get_logger(name=__name__, category="models::llama")
+
 
 class Tokenizer:
     """
diff --git a/llama_stack/models/llama/quantize_impls.py b/llama_stack/models/llama/quantize_impls.py
index a6400c5c9..7fab2d3a6 100644
--- a/llama_stack/models/llama/quantize_impls.py
+++ b/llama_stack/models/llama/quantize_impls.py
@@ -6,9 +6,10 @@
 
 # type: ignore
 import collections
-import logging
 
-log = logging.getLogger(__name__)
+from llama_stack.log import get_logger
+
+log = get_logger(name=__name__, category="llama")
 
 try:
     import fbgemm_gpu.experimental.gen_ai  # noqa: F401
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 30196c429..5794ad2c0 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import uuid
 from collections.abc import AsyncGenerator
 from datetime import UTC, datetime
@@ -42,6 +41,7 @@ from llama_stack.apis.safety import Safety
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.datatypes import AccessRule
+from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
@@ -51,7 +51,7 @@ from .config import MetaReferenceAgentsImplConfig
 from .persistence import AgentInfo
 from .responses.openai_responses import OpenAIResponsesImpl
 
-logger = logging.getLogger()
+logger = get_logger(name=__name__, category="agents")
 
 
 class MetaReferenceAgentsImpl(Agents):
diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py
index 0b234d96c..c19051f86 100644
--- a/llama_stack/providers/inline/agents/meta_reference/persistence.py
+++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import json
-import logging
 import uuid
 from datetime import UTC, datetime
 
@@ -15,9 +14,10 @@ from llama_stack.core.access_control.access_control import AccessDeniedError, is
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.datatypes import User
 from llama_stack.core.request_headers import get_authenticated_user
+from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="agents")
 
 
 class AgentSessionInfo(Session):
diff --git a/llama_stack/providers/inline/agents/meta_reference/safety.py b/llama_stack/providers/inline/agents/meta_reference/safety.py
index 605f387b7..b8a5d8a95 100644
--- a/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -5,13 +5,13 @@
 # the root directory of this source tree.
 
 import asyncio
-import logging
 
 from llama_stack.apis.inference import Message
 from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
+from llama_stack.log import get_logger
 from llama_stack.providers.utils.telemetry import tracing
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="agents")
 
 
 class SafetyException(Exception):  # noqa: N818
diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
index 7ade75032..bb6a1bd03 100644
--- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
+++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
@@ -12,7 +12,6 @@
 
 import copy
 import json
-import logging
 import multiprocessing
 import os
 import tempfile
@@ -32,13 +31,14 @@ from fairscale.nn.model_parallel.initialize import (
 from pydantic import BaseModel, Field
 from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 
+from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import GenerationResult
 from llama_stack.providers.utils.inference.prompt_adapter import (
     ChatCompletionRequestWithRawContent,
     CompletionRequestWithRawContent,
 )
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="inference")
 
 
 class ProcessingMessageName(str, Enum):
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index fea8a8189..600a5bd37 100644
--- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 from collections.abc import AsyncGenerator
 
 from llama_stack.apis.inference import (
@@ -21,6 +20,7 @@ from llama_stack.apis.inference import (
     ToolPromptFormat,
 )
 from llama_stack.apis.models import ModelType
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.embedding_mixin import (
     SentenceTransformerEmbeddingMixin,
@@ -32,7 +32,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
 
 from .config import SentenceTransformersInferenceConfig
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="inference")
 
 
 class SentenceTransformersInferenceImpl(
diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
index 2574b995b..d9ee3d2a8 100644
--- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
+++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@@ -6,7 +6,6 @@
 
 import gc
 import json
-import logging
 import multiprocessing
 from pathlib import Path
 from typing import Any
@@ -28,6 +27,7 @@ from llama_stack.apis.post_training import (
     LoraFinetuningConfig,
     TrainingConfig,
 )
+from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
 
 from ..config import HuggingFacePostTrainingConfig
@@ -44,7 +44,7 @@ from ..utils import (
     split_dataset,
 )
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="post_training")
 
 
 class HFFinetuningSingleDevice:
diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
index a7c19faac..b39a24c66 100644
--- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
+++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import gc
-import logging
 import multiprocessing
 from pathlib import Path
 from typing import Any
@@ -24,6 +23,7 @@ from llama_stack.apis.post_training import (
     DPOAlignmentConfig,
     TrainingConfig,
 )
+from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
 
 from ..config import HuggingFacePostTrainingConfig
@@ -40,7 +40,7 @@ from ..utils import (
     split_dataset,
 )
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="post_training")
 
 
 class HFDPOAlignmentSingleDevice:
diff --git a/llama_stack/providers/inline/post_training/huggingface/utils.py b/llama_stack/providers/inline/post_training/huggingface/utils.py
index 3147c19ab..f229c87dd 100644
--- a/llama_stack/providers/inline/post_training/huggingface/utils.py
+++ b/llama_stack/providers/inline/post_training/huggingface/utils.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import os
 import signal
 import sys
@@ -19,10 +18,11 @@ from transformers import AutoConfig, AutoModelForCausalLM
 
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.post_training import Checkpoint, TrainingConfig
+from llama_stack.log import get_logger
 
 from .config import HuggingFacePostTrainingConfig
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="post_training")
 
 
 def setup_environment():
diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index 49e1c95b8..8b1462862 100644
--- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import os
 import time
 from datetime import UTC, datetime
@@ -19,6 +18,7 @@ from torch.utils.data import DataLoader, DistributedSampler
 from torchtune import modules, training
 from torchtune import utils as torchtune_utils
 from torchtune.data import padded_collate_sft
+from torchtune.models.llama3._tokenizer import Llama3Tokenizer
 from torchtune.modules.loss import CEWithChunkedOutputLoss
 from torchtune.modules.peft import (
     get_adapter_params,
@@ -45,6 +45,7 @@ from llama_stack.apis.post_training import (
 )
 from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
 from llama_stack.core.utils.model_utils import model_local_dir
+from llama_stack.log import get_logger
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
 from llama_stack.providers.inline.post_training.torchtune.common import utils
@@ -56,9 +57,7 @@ from llama_stack.providers.inline.post_training.torchtune.config import (
 )
 from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset
 
-log = logging.getLogger(__name__)
-
-from torchtune.models.llama3._tokenizer import Llama3Tokenizer
+log = get_logger(name=__name__, category="post_training")
 
 
 class LoraFinetuningSingleDevice:
diff --git a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
index 6e05d5b83..5e25c559f 100644
--- a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
+++ b/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import uuid
 from typing import TYPE_CHECKING, Any
 
@@ -20,13 +19,14 @@ from llama_stack.apis.safety import (
 )
 from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
 from llama_stack.apis.shields import Shield
+from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
 
 from .config import CodeScannerConfig
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="safety")
 
 ALLOWED_CODE_SCANNER_MODEL_IDS = [
     "code-scanner",
diff --git a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
index 5d52c5d89..5c7f30aa7 100644
--- a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import re
 import uuid
 from string import Template
@@ -21,6 +20,7 @@ from llama_stack.apis.safety import (
 from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
 from llama_stack.apis.shields import Shield
 from llama_stack.core.datatypes import Api
+from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import Role
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.datatypes import ShieldsProtocolPrivate
@@ -132,6 +132,8 @@ Provide your safety assessment for ONLY THE LAST $agent_type message in the abov
 
 PROMPT_TEMPLATE = Template(f"{PROMPT_TASK}{SAFETY_CATEGORIES}{PROMPT_CONVERSATION}{PROMPT_INSTRUCTIONS}")
 
+logger = get_logger(name=__name__, category="safety")
+
 
 class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
     def __init__(self, config: LlamaGuardConfig, deps) -> None:
@@ -407,7 +409,7 @@ class LlamaGuardShield:
             unsafe_code_list = [code.strip() for code in unsafe_code.split(",")]
             invalid_codes = [code for code in unsafe_code_list if code not in SAFETY_CODE_TO_CATEGORIES_MAP]
             if invalid_codes:
-                logging.warning(f"Invalid safety codes returned: {invalid_codes}")
+                logger.warning(f"Invalid safety codes returned: {invalid_codes}")
                 # just returning safe object, as we don't know what the invalid codes can map to
                 return ModerationObject(
                     id=f"modr-{uuid.uuid4()}",
diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index c760f0fd1..6fb6c4407 100644
--- a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 from typing import Any
 
 import torch
@@ -21,6 +20,7 @@ from llama_stack.apis.safety import (
 from llama_stack.apis.safety.safety import ModerationObject
 from llama_stack.apis.shields import Shield
 from llama_stack.core.utils.model_utils import model_local_dir
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
@@ -28,7 +28,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 
 from .config import PromptGuardConfig, PromptGuardType
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="safety")
 
 PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
 
diff --git a/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py b/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
index b74c3826e..c9358101d 100644
--- a/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
@@ -7,7 +7,6 @@
 import collections
 import functools
 import json
-import logging
 import random
 import re
 import string
@@ -20,7 +19,9 @@ import nltk
 from pythainlp.tokenize import sent_tokenize as sent_tokenize_thai
 from pythainlp.tokenize import word_tokenize as word_tokenize_thai
 
-logger = logging.getLogger()
+from llama_stack.log import get_logger
+
+logger = get_logger(name=__name__, category="scoring")
 
 WORD_LIST = [
     "western",
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
index d99255c79..30710ec2a 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@@ -4,13 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import threading
 from typing import Any
 
 from opentelemetry import metrics, trace
-
-logger = logging.getLogger(__name__)
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
 from opentelemetry.sdk.metrics import MeterProvider
@@ -40,6 +37,7 @@ from llama_stack.apis.telemetry import (
     UnstructuredLogEvent,
 )
 from llama_stack.core.datatypes import Api
+from llama_stack.log import get_logger
 from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
     ConsoleSpanProcessor,
 )
@@ -61,6 +59,8 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
 _global_lock = threading.Lock()
 _TRACER_PROVIDER = None
 
+logger = get_logger(name=__name__, category="telemetry")
+
 
 def is_tracing_enabled(tracer):
     with tracer.start_as_current_span("check_tracing") as span:
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 6a7c7885c..a1543457b 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import asyncio
-import logging
 import secrets
 import string
 from typing import Any
@@ -32,6 +31,7 @@ from llama_stack.apis.tools import (
     ToolRuntime,
 )
 from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.memory.vector_store import (
@@ -42,7 +42,7 @@ from llama_stack.providers.utils.memory.vector_store import (
 from .config import RagToolRuntimeConfig
 from .context_retriever import generate_rag_query
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="tool_runtime")
 
 
 def make_random_string(length: int = 8):
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index af61da59b..258c6e7aa 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -8,7 +8,6 @@ import asyncio
 import base64
 import io
 import json
-import logging
 from typing import Any
 
 import faiss
@@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import (
     QueryChunksResponse,
     VectorIO,
 )
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
     HealthResponse,
     HealthStatus,
@@ -40,7 +40,7 @@ from llama_stack.providers.utils.memory.vector_store import (
 
 from .config import FaissVectorIOConfig
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="vector_io")
 
 VERSION = "v3"
 VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index cc1982f3b..7cf163960 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import asyncio
-import logging
 import re
 import sqlite3
 import struct
@@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import (
     QueryChunksResponse,
     VectorIO,
 )
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
@@ -36,7 +36,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorDBWithIndex,
 )
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="vector_io")
 
 # Specifying search mode is dependent on the VectorIO provider.
 VECTOR_SEARCH = "vector"
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 4857c6723..cfcfcbf90 100644
--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -3,15 +3,14 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-import logging
-
+from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .models import MODEL_ENTRIES
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="inference")
 
 
 class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
index 4a072215c..35d26fd0b 100644
--- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@@ -77,6 +77,10 @@ print(f"Response: {response.completion_message.content}")
 ```
 
 ### Create Embeddings
+> Note on OpenAI embeddings compatibility
+>
+> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`.
+
 ```python
 response = client.inference.embeddings(
     model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 7bc3fd0c9..7052cfb57 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -4,11 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import warnings
 from collections.abc import AsyncIterator
 
-from openai import APIConnectionError, BadRequestError
+from openai import NOT_GIVEN, APIConnectionError, BadRequestError
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
@@ -27,12 +26,16 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
     ToolChoice,
     ToolConfig,
 )
+from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
@@ -54,7 +57,7 @@ from .openai_utils import (
 )
 from .utils import _is_nvidia_hosted
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="inference")
 
 
 class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper):
@@ -210,6 +213,57 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper):
         #
         return EmbeddingsResponse(embeddings=[embedding.embedding for embedding in response.data])
 
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        """
+        OpenAI-compatible embeddings for NVIDIA NIM.
+
+        Note: NVIDIA NIM asymmetric embedding models require an "input_type" field not present in the standard OpenAI embeddings API.
+        We default this to "query" to ensure requests succeed when using the
+        OpenAI-compatible endpoint. For passage embeddings, use the embeddings API with
+        `task_type='document'`.
+        """
+        extra_body: dict[str, object] = {"input_type": "query"}
+        logger.warning(
+            "NVIDIA OpenAI-compatible embeddings: defaulting to input_type='query'. "
+            "For passage embeddings, use the embeddings API with task_type='document'."
+        )
+
+        response = await self.client.embeddings.create(
+            model=await self._get_provider_model_id(model),
+            input=input,
+            encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN,
+            dimensions=dimensions if dimensions is not None else NOT_GIVEN,
+            user=user if user is not None else NOT_GIVEN,
+            extra_body=extra_body,
+        )
+
+        data = []
+        for i, embedding_data in enumerate(response.data):
+            data.append(
+                OpenAIEmbeddingData(
+                    embedding=embedding_data.embedding,
+                    index=i,
+                )
+            )
+
+        usage = OpenAIEmbeddingUsage(
+            prompt_tokens=response.usage.prompt_tokens,
+            total_tokens=response.usage.total_tokens,
+        )
+
+        return OpenAIEmbeddingsResponse(
+            data=data,
+            model=response.model,
+            usage=usage,
+        )
+
     async def chat_completion(
         self,
         model_id: str,
diff --git a/llama_stack/providers/remote/inference/nvidia/utils.py b/llama_stack/providers/remote/inference/nvidia/utils.py
index 74019999e..790bbafd1 100644
--- a/llama_stack/providers/remote/inference/nvidia/utils.py
+++ b/llama_stack/providers/remote/inference/nvidia/utils.py
@@ -4,13 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
-
 import httpx
 
+from llama_stack.log import get_logger
+
 from . import NVIDIAConfig
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="inference")
 
 
 def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py
index 865258559..1c72fa0bc 100644
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@@ -4,15 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
-
+from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import OpenAIConfig
 from .models import MODEL_ENTRIES
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="inference")
 
 
 #
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py
index 323831845..9da961438 100644
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 
-import logging
 from collections.abc import AsyncGenerator
 
 from huggingface_hub import AsyncInferenceClient, HfApi
@@ -34,6 +33,7 @@ from llama_stack.apis.inference import (
     ToolPromptFormat,
 )
 from llama_stack.apis.models import Model
+from llama_stack.log import get_logger
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import (
@@ -58,7 +58,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 
 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="inference")
 
 
 def build_hf_repo_model_entries():
diff --git a/llama_stack/providers/remote/post_training/nvidia/utils.py b/llama_stack/providers/remote/post_training/nvidia/utils.py
index d6e1016b2..9a6c3b53c 100644
--- a/llama_stack/providers/remote/post_training/nvidia/utils.py
+++ b/llama_stack/providers/remote/post_training/nvidia/utils.py
@@ -4,18 +4,18 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import warnings
 from typing import Any
 
 from pydantic import BaseModel
 
 from llama_stack.apis.post_training import TrainingConfig
+from llama_stack.log import get_logger
 from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
 
 from .config import NvidiaPostTrainingConfig
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="integration")
 
 
 def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None:
diff --git a/llama_stack/providers/remote/safety/bedrock/bedrock.py b/llama_stack/providers/remote/safety/bedrock/bedrock.py
index 1895e7507..1ca87ae3d 100644
--- a/llama_stack/providers/remote/safety/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/safety/bedrock/bedrock.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import json
-import logging
 from typing import Any
 
 from llama_stack.apis.inference import Message
@@ -16,12 +15,13 @@ from llama_stack.apis.safety import (
     ViolationLevel,
 )
 from llama_stack.apis.shields import Shield
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 
 from .config import BedrockSafetyConfig
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="safety")
 
 
 class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):
diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py
index 7f17b1cb6..0d8d8ba7a 100644
--- a/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 from typing import Any
 
 import requests
@@ -12,12 +11,13 @@ import requests
 from llama_stack.apis.inference import Message
 from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel
 from llama_stack.apis.shields import Shield
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
 
 from .config import NVIDIASafetyConfig
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="safety")
 
 
 class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/llama_stack/providers/remote/safety/sambanova/sambanova.py
index 6c7190afe..676ee7185 100644
--- a/llama_stack/providers/remote/safety/sambanova/sambanova.py
+++ b/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import json
-import logging
 from typing import Any
 
 import litellm
@@ -20,12 +19,13 @@ from llama_stack.apis.safety import (
 )
 from llama_stack.apis.shields import Shield
 from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
 
 from .config import SambaNovaSafetyConfig
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="safety")
 
 CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"
 
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 8f252711b..0047e6055 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 import asyncio
 import json
-import logging
 from typing import Any
 from urllib.parse import urlparse
 
@@ -20,6 +19,7 @@ from llama_stack.apis.vector_io import (
     QueryChunksResponse,
     VectorIO,
 )
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
@@ -33,7 +33,7 @@ from llama_stack.providers.utils.memory.vector_store import (
 
 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="vector_io")
 
 ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI
 
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 0eaae81b3..034ec331c 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import asyncio
-import logging
 import os
 from typing import Any
 
@@ -21,6 +20,7 @@ from llama_stack.apis.vector_io import (
     QueryChunksResponse,
     VectorIO,
 )
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
@@ -36,7 +36,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti
 
 from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="vector_io")
 
 VERSION = "v3"
 VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::"
@@ -413,15 +413,6 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
-
-        if params and params.get("mode") == "keyword":
-            # Check if this is inline Milvus (Milvus-Lite)
-            if hasattr(self.config, "db_path"):
-                raise NotImplementedError(
-                    "Keyword search is not supported in Milvus-Lite. "
-                    "Please use a remote Milvus server for keyword search functionality."
-                )
-
         return await index.query_chunks(query, params)
 
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index d2a5d910b..e829c9e72 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 from typing import Any
 
 import psycopg2
@@ -22,6 +21,7 @@ from llama_stack.apis.vector_io import (
     QueryChunksResponse,
     VectorIO,
 )
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
@@ -34,7 +34,7 @@ from llama_stack.providers.utils.memory.vector_store import (
 
 from .config import PGVectorVectorIOConfig
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="vector_io")
 
 VERSION = "v3"
 VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::"
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 018015780..8499ff997 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import asyncio
-import logging
 import uuid
 from typing import Any
 
@@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreChunkingStrategy,
     VectorStoreFileObject,
 )
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
@@ -36,7 +36,7 @@ from llama_stack.providers.utils.memory.vector_store import (
 
 from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="vector_io")
 CHUNK_ID_KEY = "_chunk_id"
 
 # KV store prefixes for vector databases
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 966724848..ddf95317b 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
-import logging
 from typing import Any
 
 import weaviate
@@ -19,6 +18,7 @@ from llama_stack.apis.files.files import Files
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
@@ -34,7 +34,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti
 
 from .config import WeaviateVectorIOConfig
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="vector_io")
 
 VERSION = "v3"
 VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::"
diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py
index 32e89f987..05886cdc8 100644
--- a/llama_stack/providers/utils/inference/embedding_mixin.py
+++ b/llama_stack/providers/utils/inference/embedding_mixin.py
@@ -5,10 +5,11 @@
 # the root directory of this source tree.
 
 import base64
-import logging
 import struct
 from typing import TYPE_CHECKING
 
+from llama_stack.log import get_logger
+
 if TYPE_CHECKING:
     from sentence_transformers import SentenceTransformer
 
@@ -27,7 +28,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import interleaved_con
 EMBEDDING_MODELS = {}
 
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="inference")
 
 
 class SentenceTransformerEmbeddingMixin:
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index 5e6c26884..eb32d2de9 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 import base64
 import json
-import logging
 import struct
 import time
 import uuid
@@ -122,6 +121,7 @@ from llama_stack.apis.inference import (
 from llama_stack.apis.inference import (
     OpenAIChoice as OpenAIChatCompletionChoice,
 )
+from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     StopReason,
@@ -134,7 +134,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
     decode_assistant_message,
 )
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="inference")
 
 
 class OpenAICompatCompletionChoiceDelta(BaseModel):
diff --git a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py b/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
index 3842773d9..af52f3708 100644
--- a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
+++ b/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
@@ -4,16 +4,16 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 from datetime import datetime
 
 from pymongo import AsyncMongoClient
 
+from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore
 
 from ..config import MongoDBKVStoreConfig
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="kvstore")
 
 
 class MongoDBKVStoreImpl(KVStore):
diff --git a/llama_stack/providers/utils/kvstore/postgres/postgres.py b/llama_stack/providers/utils/kvstore/postgres/postgres.py
index cabb4c512..021e90774 100644
--- a/llama_stack/providers/utils/kvstore/postgres/postgres.py
+++ b/llama_stack/providers/utils/kvstore/postgres/postgres.py
@@ -4,16 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 from datetime import datetime
 
 import psycopg2
 from psycopg2.extras import DictCursor
 
+from llama_stack.log import get_logger
+
 from ..api import KVStore
 from ..config import PostgresKVStoreConfig
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="kvstore")
 
 
 class PostgresKVStoreImpl(KVStore):
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 120d0d4fc..0775b31d1 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -44,7 +44,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     make_overlapped_chunks,
 )
 
-logger = get_logger(__name__, category="vector_io")
+logger = get_logger(name=__name__, category="memory")
 
 # Constants for OpenAI vector stores
 CHUNK_MULTIPLIER = 5
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index 6ae5bb521..b5d82432d 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 import base64
 import io
-import logging
 import re
 import time
 from abc import ABC, abstractmethod
@@ -26,6 +25,7 @@ from llama_stack.apis.common.content_types import (
 from llama_stack.apis.tools import RAGDocument
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
+from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.utils.inference.prompt_adapter import (
@@ -33,7 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 
-log = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="memory")
 
 
 class ChunkForDeletion(BaseModel):
diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py
index 7080e774a..7694003b5 100644
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@@ -6,7 +6,7 @@
 
 import asyncio
 import contextvars
-import logging
+import logging  # allow-direct-logging
 import queue
 import random
 import sys
diff --git a/llama_stack/ui/app/chat-playground/page.test.tsx b/llama_stack/ui/app/chat-playground/page.test.tsx
new file mode 100644
index 000000000..983225f00
--- /dev/null
+++ b/llama_stack/ui/app/chat-playground/page.test.tsx
@@ -0,0 +1,587 @@
+import React from "react";
+import {
+  render,
+  screen,
+  fireEvent,
+  waitFor,
+  act,
+} from "@testing-library/react";
+import "@testing-library/jest-dom";
+import ChatPlaygroundPage from "./page";
+
+const mockClient = {
+  agents: {
+    list: jest.fn(),
+    create: jest.fn(),
+    retrieve: jest.fn(),
+    delete: jest.fn(),
+    session: {
+      list: jest.fn(),
+      create: jest.fn(),
+      delete: jest.fn(),
+      retrieve: jest.fn(),
+    },
+    turn: {
+      create: jest.fn(),
+    },
+  },
+  models: {
+    list: jest.fn(),
+  },
+  toolgroups: {
+    list: jest.fn(),
+  },
+};
+
+jest.mock("@/hooks/use-auth-client", () => ({
+  useAuthClient: jest.fn(() => mockClient),
+}));
+
+jest.mock("@/components/chat-playground/chat", () => ({
+  Chat: jest.fn(
+    ({
+      className,
+      messages,
+      handleSubmit,
+      input,
+      handleInputChange,
+      isGenerating,
+      append,
+      suggestions,
+    }) => (
+      <div data-testid="chat-component" className={className}>
+        <div data-testid="messages-count">{messages.length}</div>
+        <input
+          data-testid="chat-input"
+          value={input}
+          onChange={handleInputChange}
+          disabled={isGenerating}
+        />
+        <button data-testid="submit-button" onClick={handleSubmit}>
+          Submit
+        </button>
+        {suggestions?.map((suggestion: string, index: number) => (
+          <button
+            key={index}
+            data-testid={`suggestion-${index}`}
+            onClick={() => append({ role: "user", content: suggestion })}
+          >
+            {suggestion}
+          </button>
+        ))}
+      </div>
+    )
+  ),
+}));
+
+jest.mock("@/components/chat-playground/session-manager", () => ({
+  SessionManager: jest.fn(({ selectedAgentId, onNewSession }) => (
+    <div data-testid="session-manager">
+      {selectedAgentId && (
+        <>
+          <div data-testid="selected-agent">{selectedAgentId}</div>
+          <button data-testid="new-session-button" onClick={onNewSession}>
+            New Session
+          </button>
+        </>
+      )}
+    </div>
+  )),
+  SessionUtils: {
+    saveCurrentSessionId: jest.fn(),
+    loadCurrentSessionId: jest.fn(),
+    loadCurrentAgentId: jest.fn(),
+    saveCurrentAgentId: jest.fn(),
+    clearCurrentSession: jest.fn(),
+    saveSessionData: jest.fn(),
+    loadSessionData: jest.fn(),
+    saveAgentConfig: jest.fn(),
+    loadAgentConfig: jest.fn(),
+    clearAgentCache: jest.fn(),
+    createDefaultSession: jest.fn(() => ({
+      id: "test-session-123",
+      name: "Default Session",
+      messages: [],
+      selectedModel: "",
+      systemMessage: "You are a helpful assistant.",
+      agentId: "test-agent-123",
+      createdAt: Date.now(),
+      updatedAt: Date.now(),
+    })),
+  },
+}));
+
+const mockAgents = [
+  {
+    agent_id: "agent_123",
+    agent_config: {
+      name: "Test Agent",
+      instructions: "You are a test assistant.",
+    },
+  },
+  {
+    agent_id: "agent_456",
+    agent_config: {
+      agent_name: "Another Agent",
+      instructions: "You are another assistant.",
+    },
+  },
+];
+
+const mockModels = [
+  {
+    identifier: "test-model-1",
+    model_type: "llm",
+  },
+  {
+    identifier: "test-model-2",
+    model_type: "llm",
+  },
+];
+
+const mockToolgroups = [
+  {
+    identifier: "builtin::rag",
+    provider_id: "test-provider",
+    type: "tool_group",
+    provider_resource_id: "test-resource",
+  },
+];
+
+describe("ChatPlaygroundPage", () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+    Element.prototype.scrollIntoView = jest.fn();
+    mockClient.agents.list.mockResolvedValue({ data: mockAgents });
+    mockClient.models.list.mockResolvedValue(mockModels);
+    mockClient.toolgroups.list.mockResolvedValue(mockToolgroups);
+    mockClient.agents.session.create.mockResolvedValue({
+      session_id: "new-session-123",
+    });
+    mockClient.agents.session.list.mockResolvedValue({ data: [] });
+    mockClient.agents.session.retrieve.mockResolvedValue({
+      session_id: "test-session",
+      session_name: "Test Session",
+      started_at: new Date().toISOString(),
+      turns: [],
+    }); // No turns by default
+    mockClient.agents.retrieve.mockResolvedValue({
+      agent_id: "test-agent",
+      agent_config: {
+        toolgroups: ["builtin::rag"],
+        instructions: "Test instructions",
+        model: "test-model",
+      },
+    });
+    mockClient.agents.delete.mockResolvedValue(undefined);
+  });
+
+  describe("Agent Selector Rendering", () => {
+    test("shows agent selector when agents are available", async () => {
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(screen.getByText("Agent Session:")).toBeInTheDocument();
+        expect(screen.getAllByRole("combobox")).toHaveLength(2);
+        expect(screen.getByText("+ New Agent")).toBeInTheDocument();
+        expect(screen.getByText("Clear Chat")).toBeInTheDocument();
+      });
+    });
+
+    test("does not show agent selector when no agents are available", async () => {
+      mockClient.agents.list.mockResolvedValue({ data: [] });
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(screen.queryByText("Agent Session:")).not.toBeInTheDocument();
+        expect(screen.getAllByRole("combobox")).toHaveLength(1);
+        expect(screen.getByText("+ New Agent")).toBeInTheDocument();
+        expect(screen.queryByText("Clear Chat")).not.toBeInTheDocument();
+      });
+    });
+
+    test("does not show agent selector while loading", async () => {
+      mockClient.agents.list.mockImplementation(() => new Promise(() => {}));
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      expect(screen.queryByText("Agent Session:")).not.toBeInTheDocument();
+      expect(screen.getAllByRole("combobox")).toHaveLength(1);
+      expect(screen.getByText("+ New Agent")).toBeInTheDocument();
+      expect(screen.queryByText("Clear Chat")).not.toBeInTheDocument();
+    });
+
+    test("shows agent options in selector", async () => {
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        const agentCombobox = screen.getAllByRole("combobox").find(element => {
+          return (
+            element.textContent?.includes("Test Agent") ||
+            element.textContent?.includes("Select Agent")
+          );
+        });
+        expect(agentCombobox).toBeDefined();
+        fireEvent.click(agentCombobox!);
+      });
+
+      await waitFor(() => {
+        expect(screen.getAllByText("Test Agent")).toHaveLength(2);
+        expect(screen.getByText("Another Agent")).toBeInTheDocument();
+      });
+    });
+
+    test("displays agent ID when no name is available", async () => {
+      const agentWithoutName = {
+        agent_id: "agent_789",
+        agent_config: {
+          instructions: "You are an agent without a name.",
+        },
+      };
+
+      mockClient.agents.list.mockResolvedValue({ data: [agentWithoutName] });
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        const agentCombobox = screen.getAllByRole("combobox").find(element => {
+          return (
+            element.textContent?.includes("Agent agent_78") ||
+            element.textContent?.includes("Select Agent")
+          );
+        });
+        expect(agentCombobox).toBeDefined();
+        fireEvent.click(agentCombobox!);
+      });
+
+      await waitFor(() => {
+        expect(screen.getAllByText("Agent agent_78...")).toHaveLength(2);
+      });
+    });
+  });
+
+  describe("Agent Creation Modal", () => {
+    test("opens agent creation modal when + New Agent is clicked", async () => {
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      const newAgentButton = screen.getByText("+ New Agent");
+      fireEvent.click(newAgentButton);
+
+      expect(screen.getByText("Create New Agent")).toBeInTheDocument();
+      expect(screen.getByText("Agent Name (optional)")).toBeInTheDocument();
+      expect(screen.getAllByText("Model")).toHaveLength(2);
+      expect(screen.getByText("System Instructions")).toBeInTheDocument();
+      expect(screen.getByText("Tools (optional)")).toBeInTheDocument();
+    });
+
+    test("closes modal when Cancel is clicked", async () => {
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      const newAgentButton = screen.getByText("+ New Agent");
+      fireEvent.click(newAgentButton);
+
+      const cancelButton = screen.getByText("Cancel");
+      fireEvent.click(cancelButton);
+
+      expect(screen.queryByText("Create New Agent")).not.toBeInTheDocument();
+    });
+
+    test("creates agent when Create Agent is clicked", async () => {
+      mockClient.agents.create.mockResolvedValue({ agent_id: "new-agent-123" });
+      mockClient.agents.list
+        .mockResolvedValueOnce({ data: mockAgents })
+        .mockResolvedValueOnce({
+          data: [
+            ...mockAgents,
+            { agent_id: "new-agent-123", agent_config: { name: "New Agent" } },
+          ],
+        });
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      const newAgentButton = screen.getByText("+ New Agent");
+      await act(async () => {
+        fireEvent.click(newAgentButton);
+      });
+
+      await waitFor(() => {
+        expect(screen.getByText("Create New Agent")).toBeInTheDocument();
+      });
+
+      const nameInput = screen.getByPlaceholderText("My Custom Agent");
+      await act(async () => {
+        fireEvent.change(nameInput, { target: { value: "Test Agent Name" } });
+      });
+
+      const instructionsTextarea = screen.getByDisplayValue(
+        "You are a helpful assistant."
+      );
+      await act(async () => {
+        fireEvent.change(instructionsTextarea, {
+          target: { value: "Custom instructions" },
+        });
+      });
+
+      await waitFor(() => {
+        const modalModelSelectors = screen
+          .getAllByRole("combobox")
+          .filter(el => {
+            return (
+              el.textContent?.includes("Select Model") ||
+              el.closest('[class*="modal"]') ||
+              el.closest('[class*="card"]')
+            );
+          });
+        expect(modalModelSelectors.length).toBeGreaterThan(0);
+      });
+
+      const modalModelSelectors = screen.getAllByRole("combobox").filter(el => {
+        return (
+          el.textContent?.includes("Select Model") ||
+          el.closest('[class*="modal"]') ||
+          el.closest('[class*="card"]')
+        );
+      });
+
+      await act(async () => {
+        fireEvent.click(modalModelSelectors[0]);
+      });
+
+      await waitFor(() => {
+        const modelOptions = screen.getAllByText("test-model-1");
+        expect(modelOptions.length).toBeGreaterThan(0);
+      });
+
+      const modelOptions = screen.getAllByText("test-model-1");
+      const dropdownOption = modelOptions.find(
+        option =>
+          option.closest('[role="option"]') ||
+          option.id?.includes("radix") ||
+          option.getAttribute("aria-selected") !== null
+      );
+
+      await act(async () => {
+        fireEvent.click(
+          dropdownOption || modelOptions[modelOptions.length - 1]
+        );
+      });
+
+      await waitFor(() => {
+        const createButton = screen.getByText("Create Agent");
+        expect(createButton).not.toBeDisabled();
+      });
+
+      const createButton = screen.getByText("Create Agent");
+      await act(async () => {
+        fireEvent.click(createButton);
+      });
+
+      await waitFor(() => {
+        expect(mockClient.agents.create).toHaveBeenCalledWith({
+          agent_config: {
+            model: expect.any(String),
+            instructions: "Custom instructions",
+            name: "Test Agent Name",
+            enable_session_persistence: true,
+          },
+        });
+      });
+
+      await waitFor(() => {
+        expect(screen.queryByText("Create New Agent")).not.toBeInTheDocument();
+      });
+    });
+  });
+
+  describe("Agent Selection", () => {
+    test("creates default session when agent is selected", async () => {
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        // first agent should be auto-selected
+        expect(mockClient.agents.session.create).toHaveBeenCalledWith(
+          "agent_123",
+          { session_name: "Default Session" }
+        );
+      });
+    });
+
+    test("switches agent when different agent is selected", async () => {
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        const agentCombobox = screen.getAllByRole("combobox").find(element => {
+          return (
+            element.textContent?.includes("Test Agent") ||
+            element.textContent?.includes("Select Agent")
+          );
+        });
+        expect(agentCombobox).toBeDefined();
+        fireEvent.click(agentCombobox!);
+      });
+
+      await waitFor(() => {
+        const anotherAgentOption = screen.getByText("Another Agent");
+        fireEvent.click(anotherAgentOption);
+      });
+
+      expect(mockClient.agents.session.create).toHaveBeenCalledWith(
+        "agent_456",
+        { session_name: "Default Session" }
+      );
+    });
+  });
+
+  describe("Agent Deletion", () => {
+    test("shows delete button when multiple agents exist", async () => {
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(screen.getByTitle("Delete current agent")).toBeInTheDocument();
+      });
+    });
+
+    test("hides delete button when only one agent exists", async () => {
+      mockClient.agents.list.mockResolvedValue({
+        data: [mockAgents[0]],
+      });
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(
+          screen.queryByTitle("Delete current agent")
+        ).not.toBeInTheDocument();
+      });
+    });
+
+    test("deletes agent and switches to another when confirmed", async () => {
+      global.confirm = jest.fn(() => true);
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(screen.getByTitle("Delete current agent")).toBeInTheDocument();
+      });
+
+      mockClient.agents.delete.mockResolvedValue(undefined);
+      mockClient.agents.list.mockResolvedValueOnce({ data: mockAgents });
+      mockClient.agents.list.mockResolvedValueOnce({
+        data: [mockAgents[1]],
+      });
+
+      const deleteButton = screen.getByTitle("Delete current agent");
+      await act(async () => {
+        deleteButton.click();
+      });
+
+      await waitFor(() => {
+        expect(mockClient.agents.delete).toHaveBeenCalledWith("agent_123");
+        expect(global.confirm).toHaveBeenCalledWith(
+          "Are you sure you want to delete this agent? This action cannot be undone and will delete all associated sessions."
+        );
+      });
+
+      (global.confirm as jest.Mock).mockRestore();
+    });
+
+    test("does not delete agent when cancelled", async () => {
+      global.confirm = jest.fn(() => false);
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(screen.getByTitle("Delete current agent")).toBeInTheDocument();
+      });
+
+      const deleteButton = screen.getByTitle("Delete current agent");
+      await act(async () => {
+        deleteButton.click();
+      });
+
+      await waitFor(() => {
+        expect(global.confirm).toHaveBeenCalled();
+        expect(mockClient.agents.delete).not.toHaveBeenCalled();
+      });
+
+      (global.confirm as jest.Mock).mockRestore();
+    });
+  });
+
+  describe("Error Handling", () => {
+    test("handles agent loading errors gracefully", async () => {
+      mockClient.agents.list.mockRejectedValue(
+        new Error("Failed to load agents")
+      );
+      const consoleSpy = jest
+        .spyOn(console, "error")
+        .mockImplementation(() => {});
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(consoleSpy).toHaveBeenCalledWith(
+          "Error fetching agents:",
+          expect.any(Error)
+        );
+      });
+
+      expect(screen.getByText("+ New Agent")).toBeInTheDocument();
+
+      consoleSpy.mockRestore();
+    });
+
+    test("handles model loading errors gracefully", async () => {
+      mockClient.models.list.mockRejectedValue(
+        new Error("Failed to load models")
+      );
+      const consoleSpy = jest
+        .spyOn(console, "error")
+        .mockImplementation(() => {});
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(consoleSpy).toHaveBeenCalledWith(
+          "Error fetching models:",
+          expect.any(Error)
+        );
+      });
+
+      consoleSpy.mockRestore();
+    });
+  });
+});
diff --git a/llama_stack/ui/app/chat-playground/page.tsx b/llama_stack/ui/app/chat-playground/page.tsx
index 30439554d..f924a0ba2 100644
--- a/llama_stack/ui/app/chat-playground/page.tsx
+++ b/llama_stack/ui/app/chat-playground/page.tsx
@@ -10,25 +10,18 @@ import {
   SelectTrigger,
   SelectValue,
 } from "@/components/ui/select";
+import { Card } from "@/components/ui/card";
+import { Input } from "@/components/ui/input";
+import { Trash2 } from "lucide-react";
 import { Chat } from "@/components/chat-playground/chat";
 import { type Message } from "@/components/chat-playground/chat-message";
 import { useAuthClient } from "@/hooks/use-auth-client";
-import type { CompletionCreateParams } from "llama-stack-client/resources/chat/completions";
 import type { Model } from "llama-stack-client/resources/models";
+import type { TurnCreateParams } from "llama-stack-client/resources/agents/turn";
 import {
-  SessionManager,
   SessionUtils,
+  type ChatSession,
 } from "@/components/chat-playground/session-manager";
-
-interface ChatSession {
-  id: string;
-  name: string;
-  messages: Message[];
-  selectedModel: string;
-  systemMessage: string;
-  createdAt: number;
-  updatedAt: number;
-}
 export default function ChatPlaygroundPage() {
   const [currentSession, setCurrentSession] = useState<ChatSession | null>(
     null
@@ -40,26 +33,487 @@ export default function ChatPlaygroundPage() {
   const [selectedModel, setSelectedModel] = useState<string>("");
   const [modelsLoading, setModelsLoading] = useState(true);
   const [modelsError, setModelsError] = useState<string | null>(null);
+  const [agents, setAgents] = useState<
+    Array<{
+      agent_id: string;
+      agent_config?: {
+        agent_name?: string;
+        name?: string;
+        instructions?: string;
+      };
+      [key: string]: unknown;
+    }>
+  >([]);
+  const [selectedAgentConfig, setSelectedAgentConfig] = useState<{
+    toolgroups?: Array<
+      string | { name: string; args: Record<string, unknown> }
+    >;
+  } | null>(null);
+  const [selectedAgentId, setSelectedAgentId] = useState<string>("");
+  const [agentsLoading, setAgentsLoading] = useState(true);
+  const [showCreateAgent, setShowCreateAgent] = useState(false);
+  const [newAgentName, setNewAgentName] = useState("");
+  const [newAgentInstructions, setNewAgentInstructions] = useState(
+    "You are a helpful assistant."
+  );
+  const [selectedToolgroups, setSelectedToolgroups] = useState<string[]>([]);
+  const [availableToolgroups, setAvailableToolgroups] = useState<
+    Array<{
+      identifier: string;
+      provider_id: string;
+      type: string;
+      provider_resource_id?: string;
+    }>
+  >([]);
   const client = useAuthClient();
   const abortControllerRef = useRef<AbortController | null>(null);
 
   const isModelsLoading = modelsLoading ?? true;
 
+  const loadAgentConfig = useCallback(
+    async (agentId: string) => {
+      try {
+        console.log("Loading agent config for:", agentId);
+
+        // try to load from cache first
+        const cachedConfig = SessionUtils.loadAgentConfig(agentId);
+        if (cachedConfig) {
+          console.log("✅ Loaded agent config from cache:", cachedConfig);
+          setSelectedAgentConfig({
+            toolgroups: cachedConfig.toolgroups,
+          });
+          return;
+        }
+
+        console.log("📡 Fetching agent config from API...");
+        const agentDetails = await client.agents.retrieve(agentId);
+        console.log("Agent details retrieved:", agentDetails);
+        console.log("Agent config:", agentDetails.agent_config);
+        console.log("Agent toolgroups:", agentDetails.agent_config?.toolgroups);
+
+        // cache the config
+        SessionUtils.saveAgentConfig(agentId, agentDetails.agent_config);
+
+        setSelectedAgentConfig({
+          toolgroups: agentDetails.agent_config?.toolgroups,
+        });
+      } catch (error) {
+        console.error("Error loading agent config:", error);
+        setSelectedAgentConfig(null);
+      }
+    },
+    [client]
+  );
+
+  const createDefaultSession = useCallback(
+    async (agentId: string) => {
+      try {
+        const response = await client.agents.session.create(agentId, {
+          session_name: "Default Session",
+        });
+
+        const defaultSession: ChatSession = {
+          id: response.session_id,
+          name: "Default Session",
+          messages: [],
+          selectedModel: selectedModel, // Use current selected model
+          systemMessage: "You are a helpful assistant.",
+          agentId,
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+        };
+
+        setCurrentSession(defaultSession);
+        console.log(
+          `💾 Saving default session ID for agent ${agentId}:`,
+          defaultSession.id
+        );
+        SessionUtils.saveCurrentSessionId(defaultSession.id, agentId);
+        // cache entire session data
+        SessionUtils.saveSessionData(agentId, defaultSession);
+      } catch (error) {
+        console.error("Error creating default session:", error);
+      }
+    },
+    [client, selectedModel]
+  );
+
+  const loadSessionMessages = useCallback(
+    async (agentId: string, sessionId: string): Promise<Message[]> => {
+      try {
+        const session = await client.agents.session.retrieve(
+          agentId,
+          sessionId
+        );
+
+        if (!session || !session.turns || !Array.isArray(session.turns)) {
+          return [];
+        }
+
+        const messages: Message[] = [];
+        for (const turn of session.turns) {
+          // add user messages
+          if (turn.input_messages && Array.isArray(turn.input_messages)) {
+            for (const input of turn.input_messages) {
+              if (input.role === "user" && input.content) {
+                messages.push({
+                  id: `${turn.turn_id}-user-${messages.length}`,
+                  role: "user",
+                  content:
+                    typeof input.content === "string"
+                      ? input.content
+                      : JSON.stringify(input.content),
+                  createdAt: new Date(turn.started_at || Date.now()),
+                });
+              }
+            }
+          }
+
+          // add assistant message from output_message
+          if (turn.output_message && turn.output_message.content) {
+            messages.push({
+              id: `${turn.turn_id}-assistant-${messages.length}`,
+              role: "assistant",
+              content:
+                typeof turn.output_message.content === "string"
+                  ? turn.output_message.content
+                  : JSON.stringify(turn.output_message.content),
+              createdAt: new Date(
+                turn.completed_at || turn.started_at || Date.now()
+              ),
+            });
+          }
+        }
+
+        return messages;
+      } catch (error) {
+        console.error("Error loading session messages:", error);
+        return [];
+      }
+    },
+    [client]
+  );
+
+  const loadAgentSessions = useCallback(
+    async (agentId: string) => {
+      try {
+        console.log("Loading sessions for agent:", agentId);
+        const response = await client.agents.session.list(agentId);
+        console.log("Available sessions:", response.data);
+
+        if (
+          response.data &&
+          Array.isArray(response.data) &&
+          response.data.length > 0
+        ) {
+          // check for a previously saved session ID for this specific agent
+          const savedSessionId = SessionUtils.loadCurrentSessionId(agentId);
+          console.log(`Saved session ID for agent ${agentId}:`, savedSessionId);
+
+          // try to load cached session data first
+          if (savedSessionId) {
+            const cachedSession = SessionUtils.loadSessionData(
+              agentId,
+              savedSessionId
+            );
+            if (cachedSession) {
+              console.log("✅ Loaded session from cache:", cachedSession.id);
+              setCurrentSession(cachedSession);
+              SessionUtils.saveCurrentSessionId(cachedSession.id, agentId);
+              return;
+            }
+            console.log("📡 Cache miss, fetching session from API...");
+          }
+
+          let sessionToLoad = response.data[0] as {
+            session_id: string;
+            session_name?: string;
+            started_at?: string;
+          };
+          console.log(
+            "Default session to load (first in list):",
+            sessionToLoad.session_id
+          );
+
+          // try to find saved session id in available sessions
+          if (savedSessionId) {
+            const foundSession = response.data.find(
+              (s: { session_id: string }) => s.session_id === savedSessionId
+            );
+            console.log("Found saved session in list:", foundSession);
+            if (foundSession) {
+              sessionToLoad = foundSession as {
+                session_id: string;
+                session_name?: string;
+                started_at?: string;
+              };
+              console.log(
+                "✅ Restored previously selected session:",
+                savedSessionId
+              );
+            } else {
+              console.log(
+                "❌ Previously selected session not found, using latest session"
+              );
+            }
+          } else {
+            console.log("❌ No saved session ID found, using latest session");
+          }
+
+          const messages = await loadSessionMessages(
+            agentId,
+            sessionToLoad.session_id
+          );
+
+          const session: ChatSession = {
+            id: sessionToLoad.session_id,
+            name: sessionToLoad.session_name || "Session",
+            messages,
+            selectedModel: selectedModel || "", // Preserve current model or use empty
+            systemMessage: "You are a helpful assistant.",
+            agentId,
+            createdAt: sessionToLoad.started_at
+              ? new Date(sessionToLoad.started_at).getTime()
+              : Date.now(),
+            updatedAt: Date.now(),
+          };
+
+          setCurrentSession(session);
+          console.log(`💾 Saving session ID for agent ${agentId}:`, session.id);
+          SessionUtils.saveCurrentSessionId(session.id, agentId);
+          // cache session data
+          SessionUtils.saveSessionData(agentId, session);
+        } else {
+          // no sessions, create a new one
+          await createDefaultSession(agentId);
+        }
+      } catch (error) {
+        console.error("Error loading agent sessions:", error);
+        // fallback to creating a new session
+        await createDefaultSession(agentId);
+      }
+    },
+    [client, loadSessionMessages, createDefaultSession, selectedModel]
+  );
+
   useEffect(() => {
-    const saved = SessionUtils.loadCurrentSession();
-    if (saved) {
-      setCurrentSession(saved);
-    } else {
-      const def = SessionUtils.createDefaultSession();
-      const defaultSession: ChatSession = {
-        ...def,
-        selectedModel: "",
-        systemMessage: def.systemMessage || "You are a helpful assistant.",
-      };
-      setCurrentSession(defaultSession);
-      SessionUtils.saveCurrentSession(defaultSession);
-    }
-  }, []);
+    const fetchAgents = async () => {
+      try {
+        setAgentsLoading(true);
+        const agentList = await client.agents.list();
+        setAgents(
+          (agentList.data as Array<{
+            agent_id: string;
+            agent_config?: {
+              agent_name?: string;
+              name?: string;
+              instructions?: string;
+            };
+            [key: string]: unknown;
+          }>) || []
+        );
+
+        if (agentList.data && agentList.data.length > 0) {
+          // check if there's a previously selected agent
+          const savedAgentId = SessionUtils.loadCurrentAgentId();
+
+          let agentToSelect = agentList.data[0] as {
+            agent_id: string;
+            agent_config?: {
+              agent_name?: string;
+              name?: string;
+              instructions?: string;
+            };
+            [key: string]: unknown;
+          };
+
+          // if we have a saved agent ID, find it in the available agents
+          if (savedAgentId) {
+            const foundAgent = agentList.data.find(
+              (a: { agent_id: string }) => a.agent_id === savedAgentId
+            );
+            if (foundAgent) {
+              agentToSelect = foundAgent as typeof agentToSelect;
+            } else {
+              console.log("Previously slelected agent not found:");
+            }
+          }
+          setSelectedAgentId(agentToSelect.agent_id);
+          SessionUtils.saveCurrentAgentId(agentToSelect.agent_id);
+          // load agent config immediately
+          await loadAgentConfig(agentToSelect.agent_id);
+          // Note: loadAgentSessions will be called after models are loaded
+        }
+      } catch (error) {
+        console.error("Error fetching agents:", error);
+      } finally {
+        setAgentsLoading(false);
+      }
+    };
+
+    fetchAgents();
+
+    // fetch available toolgroups
+    const fetchToolgroups = async () => {
+      try {
+        console.log("Fetching toolgroups...");
+        const toolgroups = await client.toolgroups.list();
+        console.log("Toolgroups response:", toolgroups);
+
+        // The client returns data directly, not wrapped in .data
+        const toolGroupsArray = Array.isArray(toolgroups)
+          ? toolgroups
+          : toolgroups &&
+              typeof toolgroups === "object" &&
+              "data" in toolgroups &&
+              Array.isArray((toolgroups as { data: unknown }).data)
+            ? (
+                toolgroups as {
+                  data: Array<{
+                    identifier: string;
+                    provider_id: string;
+                    type: string;
+                    provider_resource_id?: string;
+                  }>;
+                }
+              ).data
+            : [];
+
+        if (toolGroupsArray && Array.isArray(toolGroupsArray)) {
+          setAvailableToolgroups(toolGroupsArray);
+          console.log("Set toolgroups:", toolGroupsArray);
+        } else {
+          console.error("Invalid toolgroups data format:", toolgroups);
+        }
+      } catch (error) {
+        console.error("Error fetching toolgroups:", error);
+        if (error instanceof Error) {
+          console.error("Error details:", {
+            name: error.name,
+            message: error.message,
+            stack: error.stack,
+          });
+        }
+      }
+    };
+
+    fetchToolgroups();
+  }, [client, loadAgentSessions, loadAgentConfig]);
+
+  const createNewAgent = useCallback(
+    async (
+      name: string,
+      instructions: string,
+      model: string,
+      toolgroups: string[] = []
+    ) => {
+      try {
+        console.log("Creating agent with toolgroups:", toolgroups);
+        const agentConfig = {
+          model,
+          instructions,
+          name: name || undefined,
+          enable_session_persistence: true,
+          toolgroups: toolgroups.length > 0 ? toolgroups : undefined,
+        };
+        console.log("Agent config being sent:", agentConfig);
+
+        const response = await client.agents.create({
+          agent_config: agentConfig,
+        });
+
+        // refresh agents list
+        const agentList = await client.agents.list();
+        setAgents(
+          (agentList.data as Array<{
+            agent_id: string;
+            agent_config?: {
+              agent_name?: string;
+              name?: string;
+              instructions?: string;
+            };
+            [key: string]: unknown;
+          }>) || []
+        );
+
+        // set the new agent as selected
+        setSelectedAgentId(response.agent_id);
+        await loadAgentConfig(response.agent_id);
+        await loadAgentSessions(response.agent_id);
+
+        return response.agent_id;
+      } catch (error) {
+        console.error("Error creating agent:", error);
+        throw error;
+      }
+    },
+    [client, loadAgentSessions, loadAgentConfig]
+  );
+
+  const deleteAgent = useCallback(
+    async (agentId: string) => {
+      if (agents.length <= 1) {
+        return;
+      }
+
+      if (
+        confirm(
+          "Are you sure you want to delete this agent? This action cannot be undone and will delete all associated sessions."
+        )
+      ) {
+        try {
+          await client.agents.delete(agentId);
+
+          // clear cached data for agent
+          SessionUtils.clearAgentCache(agentId);
+
+          // Refresh agents list
+          const agentList = await client.agents.list();
+          setAgents(
+            (agentList.data as Array<{
+              agent_id: string;
+              agent_config?: {
+                agent_name?: string;
+                name?: string;
+                instructions?: string;
+              };
+              [key: string]: unknown;
+            }>) || []
+          );
+
+          // if we deleted the current agent, switch to another one
+          if (selectedAgentId === agentId) {
+            const remainingAgents = agentList.data?.filter(
+              (a: { agent_id: string }) => a.agent_id !== agentId
+            );
+            if (remainingAgents && remainingAgents.length > 0) {
+              const newAgent = remainingAgents[0] as {
+                agent_id: string;
+                agent_config?: {
+                  agent_name?: string;
+                  name?: string;
+                  instructions?: string;
+                };
+                [key: string]: unknown;
+              };
+              setSelectedAgentId(newAgent.agent_id);
+              SessionUtils.saveCurrentAgentId(newAgent.agent_id);
+              await loadAgentConfig(newAgent.agent_id);
+              await loadAgentSessions(newAgent.agent_id);
+            } else {
+              // No agents left
+              setSelectedAgentId("");
+              setCurrentSession(null);
+              setSelectedAgentConfig(null);
+            }
+          }
+        } catch (error) {
+          console.error("Error deleting agent:", error);
+        }
+      }
+    },
+    [agents.length, client, selectedAgentId, loadAgentConfig, loadAgentSessions]
+  );
 
   const handleModelChange = useCallback((newModel: string) => {
     setSelectedModel(newModel);
@@ -76,16 +530,25 @@ export default function ChatPlaygroundPage() {
 
   useEffect(() => {
     if (currentSession) {
-      if (abortControllerRef.current) {
-        abortControllerRef.current.abort();
-        abortControllerRef.current = null;
-        setIsGenerating(false);
+      console.log(
+        `💾 Auto-saving session ID for agent ${currentSession.agentId}:`,
+        currentSession.id
+      );
+      SessionUtils.saveCurrentSessionId(
+        currentSession.id,
+        currentSession.agentId
+      );
+      // cache session data
+      SessionUtils.saveSessionData(currentSession.agentId, currentSession);
+      // only update selectedModel if the session has a valid model and it's different from current
+      if (
+        currentSession.selectedModel &&
+        currentSession.selectedModel !== selectedModel
+      ) {
+        setSelectedModel(currentSession.selectedModel);
       }
-
-      SessionUtils.saveCurrentSession(currentSession);
-      setSelectedModel(currentSession.selectedModel);
     }
-  }, [currentSession]);
+  }, [currentSession, selectedModel]);
 
   useEffect(() => {
     const fetchModels = async () => {
@@ -109,37 +572,25 @@ export default function ChatPlaygroundPage() {
     fetchModels();
   }, [client, handleModelChange]);
 
-  const extractTextContent = (content: unknown): string => {
-    if (typeof content === "string") {
-      return content;
-    }
-    if (Array.isArray(content)) {
-      return content
-        .filter(
-          item =>
-            item &&
-            typeof item === "object" &&
-            "type" in item &&
-            item.type === "text"
-        )
-        .map(item =>
-          item && typeof item === "object" && "text" in item
-            ? String(item.text)
-            : ""
-        )
-        .join("");
-    }
+  // load agent sessions after both agents and models are ready
+  useEffect(() => {
     if (
-      content &&
-      typeof content === "object" &&
-      "type" in content &&
-      content.type === "text" &&
-      "text" in content
+      selectedAgentId &&
+      !agentsLoading &&
+      !modelsLoading &&
+      selectedModel &&
+      !currentSession
     ) {
-      return String(content.text) || "";
+      loadAgentSessions(selectedAgentId);
     }
-    return "";
-  };
+  }, [
+    selectedAgentId,
+    agentsLoading,
+    modelsLoading,
+    selectedModel,
+    currentSession,
+    loadAgentSessions,
+  ]);
 
   const handleInputChange = (e: React.ChangeEvent<HTMLTextAreaElement>) => {
     setInput(e.target.value);
@@ -156,21 +607,25 @@ export default function ChatPlaygroundPage() {
       createdAt: new Date(),
     };
 
-    setCurrentSession(prev =>
-      prev
-        ? {
-            ...prev,
-            messages: [...prev.messages, userMessage],
-            updatedAt: Date.now(),
-          }
-        : prev
-    );
+    setCurrentSession(prev => {
+      if (!prev) return prev;
+      const updatedSession = {
+        ...prev,
+        messages: [...prev.messages, userMessage],
+        updatedAt: Date.now(),
+      };
+      // Update cache with new message
+      SessionUtils.saveSessionData(prev.agentId, updatedSession);
+      return updatedSession;
+    });
     setInput("");
 
     await handleSubmitWithContent(userMessage.content);
   };
 
   const handleSubmitWithContent = async (content: string) => {
+    if (!currentSession || !selectedAgentId) return;
+
     setIsGenerating(true);
     setError(null);
 
@@ -182,32 +637,20 @@ export default function ChatPlaygroundPage() {
     abortControllerRef.current = abortController;
 
     try {
-      const messageParams: CompletionCreateParams["messages"] = [
-        ...(currentSession?.systemMessage
-          ? [{ role: "system" as const, content: currentSession.systemMessage }]
-          : []),
-        ...(currentSession?.messages || []).map(msg => {
-          const msgContent =
-            typeof msg.content === "string"
-              ? msg.content
-              : extractTextContent(msg.content);
-          if (msg.role === "user") {
-            return { role: "user" as const, content: msgContent };
-          } else if (msg.role === "assistant") {
-            return { role: "assistant" as const, content: msgContent };
-          } else {
-            return { role: "system" as const, content: msgContent };
-          }
-        }),
-        { role: "user" as const, content },
-      ];
+      const userMessage = {
+        role: "user" as const,
+        content,
+      };
 
-      const response = await client.chat.completions.create(
-        {
-          model: selectedModel || "",
-          messages: messageParams,
-          stream: true,
-        },
+      const turnParams: TurnCreateParams = {
+        messages: [userMessage],
+        stream: true,
+      };
+
+      const response = await client.agents.turn.create(
+        selectedAgentId,
+        currentSession.id,
+        turnParams,
         {
           signal: abortController.signal,
         } as { signal: AbortSignal }
@@ -220,34 +663,86 @@ export default function ChatPlaygroundPage() {
         createdAt: new Date(),
       };
 
-      setCurrentSession(prev =>
-        prev
-          ? {
-              ...prev,
-              messages: [...prev.messages, assistantMessage],
-              updatedAt: Date.now(),
-            }
-          : null
-      );
+      const extractDeltaText = (chunk: unknown): string | null => {
+        // this is an awful way to handle different chunk formats, but i'm not sure if there's much of a better way
+        if (chunk?.delta?.text && typeof chunk.delta.text === "string") {
+          return chunk.delta.text;
+        }
+
+        if (
+          chunk?.event?.delta?.text &&
+          typeof chunk.event.delta.text === "string"
+        ) {
+          return chunk.event.delta.text;
+        }
+
+        if (
+          chunk?.choices?.[0]?.delta?.content &&
+          typeof chunk.choices[0].delta.content === "string"
+        ) {
+          return chunk.choices[0].delta.content;
+        }
+
+        if (typeof chunk === "string") {
+          return chunk;
+        }
+
+        if (
+          chunk?.event?.payload?.delta?.text &&
+          typeof chunk.event.payload.delta.text === "string"
+        ) {
+          return chunk.event.payload.delta.text;
+        }
+
+        if (process.env.NODE_ENV !== "production") {
+          console.debug("Unrecognized chunk format:", chunk);
+        }
+
+        return null;
+      };
+      setCurrentSession(prev => {
+        if (!prev) return null;
+        const updatedSession = {
+          ...prev,
+          messages: [...prev.messages, assistantMessage],
+          updatedAt: Date.now(),
+        };
+        // update cache with assistant message
+        SessionUtils.saveSessionData(prev.agentId, updatedSession);
+        return updatedSession;
+      });
+
       let fullContent = "";
       for await (const chunk of response) {
-        if (chunk.choices && chunk.choices[0]?.delta?.content) {
-          const deltaContent = chunk.choices[0].delta.content;
-          fullContent += deltaContent;
+        const deltaText = extractDeltaText(chunk);
+
+        if (deltaText) {
+          fullContent += deltaText;
 
           flushSync(() => {
             setCurrentSession(prev => {
               if (!prev) return null;
               const newMessages = [...prev.messages];
               const last = newMessages[newMessages.length - 1];
-              if (last.role === "assistant") last.content = fullContent;
-              return { ...prev, messages: newMessages, updatedAt: Date.now() };
+              if (last.role === "assistant") {
+                last.content = fullContent;
+              }
+              const updatedSession = {
+                ...prev,
+                messages: newMessages,
+                updatedAt: Date.now(),
+              };
+              // update cache with streaming content (throttled)
+              if (fullContent.length % 100 === 0) {
+                // Only cache every 100 characters to avoid spam
+                SessionUtils.saveSessionData(prev.agentId, updatedSession);
+              }
+              return updatedSession;
             });
           });
         }
       }
     } catch (err) {
-      // don't show error if request was aborted
       if (err instanceof Error && err.name === "AbortError") {
         console.log("Request aborted");
         return;
@@ -267,6 +762,13 @@ export default function ChatPlaygroundPage() {
     } finally {
       setIsGenerating(false);
       abortControllerRef.current = null;
+      // cache final session state after streaming completes
+      setCurrentSession(prev => {
+        if (prev) {
+          SessionUtils.saveSessionData(prev.agentId, prev);
+        }
+        return prev;
+      });
     }
   };
   const suggestions = [
@@ -295,54 +797,99 @@ export default function ChatPlaygroundPage() {
   };
 
   const clearChat = () => {
+    if (abortControllerRef.current) {
+      abortControllerRef.current.abort();
+      abortControllerRef.current = null;
+      setIsGenerating(false);
+    }
+
     setCurrentSession(prev =>
       prev ? { ...prev, messages: [], updatedAt: Date.now() } : prev
     );
     setError(null);
   };
 
-  const handleSessionChange = (session: ChatSession) => {
-    setCurrentSession(session);
-    setError(null);
-  };
-
-  const handleNewSession = () => {
-    const defaultModel =
-      currentSession?.selectedModel ||
-      (models.length > 0 ? models[0].identifier : "");
-
-    const newSession: ChatSession = {
-      ...SessionUtils.createDefaultSession(),
-      selectedModel: defaultModel,
-      systemMessage:
-        currentSession?.systemMessage || "You are a helpful assistant.",
-      messages: [],
-      updatedAt: Date.now(),
-      createdAt: Date.now(),
-    };
-    setCurrentSession(newSession);
-    SessionUtils.saveCurrentSession(newSession);
-  };
-
   return (
     <div className="flex flex-col h-full w-full max-w-7xl mx-auto">
       {/* Header */}
       <div className="mb-6">
         <div className="flex justify-between items-center mb-4">
-          <h1 className="text-3xl font-bold">Chat Playground</h1>
+          <h1 className="text-3xl font-bold">Agent Session</h1>
           <div className="flex items-center gap-3">
-            <SessionManager
-              currentSession={currentSession}
-              onSessionChange={handleSessionChange}
-              onNewSession={handleNewSession}
-            />
+            {!agentsLoading && agents.length > 0 && (
+              <div className="flex items-center gap-2">
+                <label className="text-sm font-medium">Agent Session:</label>
+                <Select
+                  value={selectedAgentId}
+                  onValueChange={agentId => {
+                    console.log("🤖 User selected agent:", agentId);
+                    setSelectedAgentId(agentId);
+                    SessionUtils.saveCurrentAgentId(agentId);
+                    loadAgentConfig(agentId);
+                    loadAgentSessions(agentId);
+                  }}
+                  disabled={agentsLoading}
+                >
+                  <SelectTrigger className="w-[200px]">
+                    <SelectValue
+                      placeholder={
+                        agentsLoading ? "Loading..." : "Select Agent Session"
+                      }
+                    />
+                  </SelectTrigger>
+                  <SelectContent>
+                    {agents.map(agent => (
+                      <SelectItem key={agent.agent_id} value={agent.agent_id}>
+                        {(() => {
+                          if (
+                            agent.agent_config &&
+                            "name" in agent.agent_config &&
+                            typeof agent.agent_config.name === "string"
+                          ) {
+                            return agent.agent_config.name;
+                          }
+                          if (
+                            agent.agent_config &&
+                            "agent_name" in agent.agent_config &&
+                            typeof agent.agent_config.agent_name === "string"
+                          ) {
+                            return agent.agent_config.agent_name;
+                          }
+                          return `Agent ${agent.agent_id.slice(0, 8)}...`;
+                        })()}
+                      </SelectItem>
+                    ))}
+                  </SelectContent>
+                </Select>
+                {selectedAgentId && agents.length > 1 && (
+                  <Button
+                    onClick={() => deleteAgent(selectedAgentId)}
+                    variant="outline"
+                    size="sm"
+                    className="text-destructive hover:text-destructive hover:bg-destructive/10"
+                    title="Delete current agent"
+                  >
+                    <Trash2 className="h-3 w-3" />
+                  </Button>
+                )}
+              </div>
+            )}
             <Button
+              onClick={() => setShowCreateAgent(true)}
               variant="outline"
-              onClick={clearChat}
-              disabled={isGenerating}
+              size="sm"
             >
-              Clear Chat
+              + New Agent
             </Button>
+            {!agentsLoading && agents.length > 0 && (
+              <Button
+                variant="outline"
+                onClick={clearChat}
+                disabled={isGenerating}
+              >
+                Clear Chat
+              </Button>
+            )}
           </div>
         </div>
       </div>
@@ -392,25 +939,97 @@ export default function ChatPlaygroundPage() {
 
               <div>
                 <label className="text-sm font-medium block mb-2">
-                  System Message
+                  Agent Instructions
                 </label>
-                <textarea
-                  value={currentSession?.systemMessage || ""}
-                  onChange={e =>
-                    setCurrentSession(prev =>
-                      prev
-                        ? {
-                            ...prev,
-                            systemMessage: e.target.value,
-                            updatedAt: Date.now(),
-                          }
-                        : prev
+                <div className="w-full h-24 px-3 py-2 text-sm border border-input rounded-md bg-muted text-muted-foreground">
+                  {(selectedAgentId &&
+                    agents.find(a => a.agent_id === selectedAgentId)
+                      ?.agent_config?.instructions) ||
+                    "No agent selected"}
+                </div>
+                <p className="text-xs text-muted-foreground mt-1">
+                  Instructions are set when creating an agent and cannot be
+                  changed.
+                </p>
+              </div>
+            </div>
+          </div>
+
+          {/* Agent Tools */}
+          <div className="space-y-4 text-left">
+            <h3 className="text-lg font-semibold border-b pb-2 text-left">
+              Agent Tools
+            </h3>
+            <div className="space-y-3">
+              <div>
+                <label className="text-sm font-medium block mb-2 text-muted-foreground">
+                  Configured Tools (Coming Soon)
+                </label>
+                <div className="space-y-2">
+                  {selectedAgentConfig?.toolgroups &&
+                  selectedAgentConfig.toolgroups.length > 0 ? (
+                    selectedAgentConfig.toolgroups.map(
+                      (
+                        toolgroup:
+                          | string
+                          | { name: string; args: Record<string, unknown> },
+                        index: number
+                      ) => {
+                        const toolName =
+                          typeof toolgroup === "string"
+                            ? toolgroup
+                            : toolgroup.name;
+                        const toolArgs =
+                          typeof toolgroup === "object" ? toolgroup.args : null;
+
+                        return (
+                          <div
+                            key={index}
+                            className="p-3 border border-input rounded-md bg-muted text-muted-foreground"
+                          >
+                            <div className="flex items-center justify-between">
+                              <code className="text-sm font-mono text-primary">
+                                {toolName}
+                              </code>
+                              <span className="text-xs text-muted-foreground">
+                                {toolName.includes("rag")
+                                  ? "🔍 RAG"
+                                  : toolName.includes("search")
+                                    ? "🌐 Search"
+                                    : "🔧 Tool"}
+                              </span>
+                            </div>
+                            {toolArgs && Object.keys(toolArgs).length > 0 && (
+                              <div className="mt-2 text-xs text-muted-foreground">
+                                <span className="font-medium">Args:</span>{" "}
+                                {Object.entries(toolArgs)
+                                  .map(
+                                    ([key, value]) =>
+                                      `${key}: ${JSON.stringify(value)}`
+                                  )
+                                  .join(", ")}
+                              </div>
+                            )}
+                          </div>
+                        );
+                      }
                     )
-                  }
-                  placeholder="You are a helpful assistant."
-                  disabled={isGenerating}
-                  className="w-full h-24 px-3 py-2 text-sm border border-input rounded-md resize-none focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
-                />
+                  ) : (
+                    <div className="p-3 border border-input rounded-md bg-muted text-center">
+                      <p className="text-sm text-muted-foreground">
+                        No tools configured
+                      </p>
+                      <p className="text-xs text-muted-foreground mt-1">
+                        This agent only has text generation capabilities
+                      </p>
+                    </div>
+                  )}
+                </div>
+                <p className="text-xs text-muted-foreground mt-2">
+                  Tools are configured when creating an agent and provide
+                  additional capabilities like web search, math calculations, or
+                  RAG document retrieval.
+                </p>
               </div>
             </div>
           </div>
@@ -441,6 +1060,181 @@ export default function ChatPlaygroundPage() {
           />
         </div>
       </div>
+
+      {/* Create Agent Modal */}
+      {showCreateAgent && (
+        <div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
+          <Card className="w-[500px] p-6 space-y-4">
+            <h3 className="text-lg font-semibold">Create New Agent</h3>
+
+            <div className="space-y-4">
+              <div>
+                <label className="text-sm font-medium block mb-2">
+                  Agent Name (optional)
+                </label>
+                <Input
+                  value={newAgentName}
+                  onChange={e => setNewAgentName(e.target.value)}
+                  placeholder="My Custom Agent"
+                />
+              </div>
+
+              <div>
+                <label className="text-sm font-medium block mb-2">Model</label>
+                <Select value={selectedModel} onValueChange={setSelectedModel}>
+                  <SelectTrigger>
+                    <SelectValue placeholder="Select Model" />
+                  </SelectTrigger>
+                  <SelectContent>
+                    {models.map(model => (
+                      <SelectItem
+                        key={model.identifier}
+                        value={model.identifier}
+                      >
+                        {model.identifier}
+                      </SelectItem>
+                    ))}
+                  </SelectContent>
+                </Select>
+              </div>
+
+              <div>
+                <label className="text-sm font-medium block mb-2">
+                  System Instructions
+                </label>
+                <textarea
+                  value={newAgentInstructions}
+                  onChange={e => setNewAgentInstructions(e.target.value)}
+                  placeholder="You are a helpful assistant."
+                  className="w-full h-32 px-3 py-2 text-sm border border-input rounded-md resize-none focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
+                />
+              </div>
+
+              <div>
+                <label className="text-sm font-medium block mb-2">
+                  Tools (optional)
+                </label>
+                <label className="text-sm font-small block mb-2">
+                  NOTE: Tools are not yet implemented
+                </label>
+                <p className="text-xs text-muted-foreground mb-2">
+                  Available toolgroups: {availableToolgroups.length} found
+                </p>
+                <div className="space-y-2">
+                  {availableToolgroups.length === 0 ? (
+                    <p className="text-sm text-muted-foreground">
+                      Loading toolgroups...
+                    </p>
+                  ) : (
+                    availableToolgroups.map(toolgroup => (
+                      <label
+                        key={toolgroup.identifier}
+                        className="flex items-center space-x-2"
+                      >
+                        <input
+                          type="checkbox"
+                          checked={selectedToolgroups.includes(
+                            toolgroup.identifier
+                          )}
+                          onChange={e => {
+                            console.log(
+                              "Tool selection changed:",
+                              toolgroup.identifier,
+                              e.target.checked
+                            );
+                            if (e.target.checked) {
+                              setSelectedToolgroups(prev => {
+                                const newSelection = [
+                                  ...prev,
+                                  toolgroup.identifier,
+                                ];
+                                console.log(
+                                  "New selected toolgroups:",
+                                  newSelection
+                                );
+                                return newSelection;
+                              });
+                            } else {
+                              setSelectedToolgroups(prev => {
+                                const newSelection = prev.filter(
+                                  id => id !== toolgroup.identifier
+                                );
+                                console.log(
+                                  "New selected toolgroups:",
+                                  newSelection
+                                );
+                                return newSelection;
+                              });
+                            }
+                          }}
+                          className="rounded border-input"
+                        />
+                        <span className="text-sm">
+                          <code className="bg-muted px-1 rounded text-xs">
+                            {toolgroup.identifier}
+                          </code>
+                          <span className="text-muted-foreground ml-2">
+                            ({toolgroup.provider_id})
+                          </span>
+                        </span>
+                      </label>
+                    ))
+                  )}
+                </div>
+                {selectedToolgroups.length === 0 && (
+                  <p className="text-xs text-muted-foreground mt-1">
+                    No tools selected - agent will only have text generation
+                    capabilities.
+                  </p>
+                )}
+                <p className="text-xs text-muted-foreground mt-2 p-2 bg-muted/50 border border-border rounded">
+                  <strong>Note:</strong> Selected tools will be configured for
+                  the agent. Some tools like RAG may require additional vector
+                  DB configuration, and web search tools need API keys. Basic
+                  text generation agents work without tools.
+                </p>
+              </div>
+            </div>
+
+            <div className="flex gap-2 pt-4">
+              <Button
+                onClick={async () => {
+                  try {
+                    await createNewAgent(
+                      newAgentName,
+                      newAgentInstructions,
+                      selectedModel,
+                      selectedToolgroups
+                    );
+                    setShowCreateAgent(false);
+                    setNewAgentName("");
+                    setNewAgentInstructions("You are a helpful assistant.");
+                    setSelectedToolgroups([]);
+                  } catch (error) {
+                    console.error("Failed to create agent:", error);
+                  }
+                }}
+                className="flex-1"
+                disabled={!selectedModel || !newAgentInstructions.trim()}
+              >
+                Create Agent
+              </Button>
+              <Button
+                variant="outline"
+                onClick={() => {
+                  setShowCreateAgent(false);
+                  setNewAgentName("");
+                  setNewAgentInstructions("You are a helpful assistant.");
+                  setSelectedToolgroups([]);
+                }}
+                className="flex-1"
+              >
+                Cancel
+              </Button>
+            </div>
+          </Card>
+        </div>
+      )}
     </div>
   );
 }
diff --git a/llama_stack/ui/app/favicon.ico b/llama_stack/ui/app/favicon.ico
deleted file mode 100644
index 718d6fea4..000000000
Binary files a/llama_stack/ui/app/favicon.ico and /dev/null differ
diff --git a/llama_stack/ui/app/globals.css b/llama_stack/ui/app/globals.css
index dc98be74c..000dad718 100644
--- a/llama_stack/ui/app/globals.css
+++ b/llama_stack/ui/app/globals.css
@@ -120,3 +120,44 @@
     @apply bg-background text-foreground;
   }
 }
+
+@layer utilities {
+  .animate-typing-dot-1 {
+    animation: typing-dot-bounce-1 0.8s cubic-bezier(0.4, 0, 0.6, 1) infinite;
+  }
+
+  .animate-typing-dot-2 {
+    animation: typing-dot-bounce-2 0.8s cubic-bezier(0.4, 0, 0.6, 1) infinite;
+  }
+
+  .animate-typing-dot-3 {
+    animation: typing-dot-bounce-3 0.8s cubic-bezier(0.4, 0, 0.6, 1) infinite;
+  }
+
+  @keyframes typing-dot-bounce-1 {
+    0%, 15%, 85%, 100% {
+      transform: translateY(0);
+    }
+    7.5% {
+      transform: translateY(-6px);
+    }
+  }
+
+  @keyframes typing-dot-bounce-2 {
+    0%, 15%, 35%, 85%, 100% {
+      transform: translateY(0);
+    }
+    25% {
+      transform: translateY(-6px);
+    }
+  }
+
+  @keyframes typing-dot-bounce-3 {
+    0%, 35%, 55%, 85%, 100% {
+      transform: translateY(0);
+    }
+    45% {
+      transform: translateY(-6px);
+    }
+  }
+}
diff --git a/llama_stack/ui/app/layout.tsx b/llama_stack/ui/app/layout.tsx
index 19fb18c36..8b91341e4 100644
--- a/llama_stack/ui/app/layout.tsx
+++ b/llama_stack/ui/app/layout.tsx
@@ -18,6 +18,9 @@ const geistMono = Geist_Mono({
 export const metadata: Metadata = {
   title: "Llama Stack",
   description: "Llama Stack UI",
+  icons: {
+    icon: "/favicon.ico",
+  },
 };
 
 import { SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar";
diff --git a/llama_stack/ui/components/chat-playground/session-manager.test.tsx b/llama_stack/ui/components/chat-playground/session-manager.test.tsx
index 717c26410..bcbeed6a7 100644
--- a/llama_stack/ui/components/chat-playground/session-manager.test.tsx
+++ b/llama_stack/ui/components/chat-playground/session-manager.test.tsx
@@ -1,11 +1,5 @@
 import React from "react";
-import {
-  render,
-  screen,
-  fireEvent,
-  waitFor,
-  act,
-} from "@testing-library/react";
+import { render, screen, waitFor, act } from "@testing-library/react";
 import "@testing-library/jest-dom";
 import { SessionManager, SessionUtils } from "./session-manager";
 import type { Message } from "@/components/chat-playground/chat-message";
@@ -15,8 +9,8 @@ interface ChatSession {
   name: string;
   messages: Message[];
   selectedModel: string;
-  selectedVectorDb: string;
   systemMessage: string;
+  agentId: string;
   createdAt: number;
   updatedAt: number;
 }
@@ -24,6 +18,39 @@ interface ChatSession {
 const mockOnSessionChange = jest.fn();
 const mockOnNewSession = jest.fn();
 
+// Mock the auth client
+const mockClient = {
+  agents: {
+    session: {
+      list: jest.fn(),
+      create: jest.fn(),
+      delete: jest.fn(),
+      retrieve: jest.fn(),
+    },
+  },
+};
+
+// Mock the useAuthClient hook
+jest.mock("@/hooks/use-auth-client", () => ({
+  useAuthClient: jest.fn(() => mockClient),
+}));
+
+// Mock additional SessionUtils methods that are now being used
+jest.mock("./session-manager", () => {
+  const actual = jest.requireActual("./session-manager");
+  return {
+    ...actual,
+    SessionUtils: {
+      ...actual.SessionUtils,
+      saveSessionData: jest.fn(),
+      loadSessionData: jest.fn(),
+      saveAgentConfig: jest.fn(),
+      loadAgentConfig: jest.fn(),
+      clearAgentCache: jest.fn(),
+    },
+  };
+});
+
 const localStorageMock = {
   getItem: jest.fn(),
   setItem: jest.fn(),
@@ -54,27 +81,28 @@ describe("SessionManager", () => {
         id: "msg_1",
         role: "user",
         content: "Hello",
-        timestamp: Date.now(),
+        createdAt: new Date(),
       },
     ],
     selectedModel: "test-model",
-    selectedVectorDb: "test-vector-db",
     systemMessage: "You are a helpful assistant.",
+    agentId: "agent_123",
     createdAt: 1710000000,
     updatedAt: 1710001000,
   };
 
-  const mockSessions: ChatSession[] = [
-    mockSession,
+  const mockAgentSessions = [
     {
-      id: "session_456",
-      name: "Another Session",
-      messages: [],
-      selectedModel: "another-model",
-      selectedVectorDb: "another-vector-db",
-      systemMessage: "You are another assistant.",
-      createdAt: 1710002000,
-      updatedAt: 1710003000,
+      session_id: "session_123",
+      session_name: "Test Session",
+      started_at: "2024-01-01T00:00:00Z",
+      turns: [],
+    },
+    {
+      session_id: "session_456",
+      session_name: "Another Session",
+      started_at: "2024-01-01T01:00:00Z",
+      turns: [],
     },
   ];
 
@@ -82,14 +110,47 @@ describe("SessionManager", () => {
     jest.clearAllMocks();
     localStorageMock.getItem.mockReturnValue(null);
     localStorageMock.setItem.mockImplementation(() => {});
+    mockClient.agents.session.list.mockResolvedValue({
+      data: mockAgentSessions,
+    });
+    mockClient.agents.session.create.mockResolvedValue({
+      session_id: "new_session_123",
+    });
+    mockClient.agents.session.delete.mockResolvedValue(undefined);
+    mockClient.agents.session.retrieve.mockResolvedValue({
+      session_id: "test-session",
+      session_name: "Test Session",
+      started_at: new Date().toISOString(),
+      turns: [],
+    });
     uuidCounter = 0; // Reset UUID counter for consistent test behavior
   });
 
   describe("Component Rendering", () => {
-    test("renders session selector with placeholder when no session selected", async () => {
+    test("does not render when no agent is selected", async () => {
+      const { container } = await act(async () => {
+        return render(
+          <SessionManager
+            selectedAgentId=""
+            currentSession={null}
+            onSessionChange={mockOnSessionChange}
+            onNewSession={mockOnNewSession}
+          />
+        );
+      });
+
+      expect(container.firstChild).toBeNull();
+    });
+
+    test("renders loading state initially", async () => {
+      mockClient.agents.session.list.mockImplementation(
+        () => new Promise(() => {}) // Never resolves to simulate loading
+      );
+
       await act(async () => {
         render(
           <SessionManager
+            selectedAgentId="agent_123"
             currentSession={null}
             onSessionChange={mockOnSessionChange}
             onNewSession={mockOnNewSession}
@@ -98,146 +159,15 @@ describe("SessionManager", () => {
       });
 
       expect(screen.getByText("Select Session")).toBeInTheDocument();
-      expect(screen.getByRole("button", { name: /New/ })).toBeInTheDocument();
+      // When loading, the "+ New" button should be disabled
+      expect(screen.getByText("+ New")).toBeDisabled();
     });
 
-    test("renders current session name when session is selected", async () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      await waitFor(() => {
-        expect(screen.getByText("Test Session")).toBeInTheDocument();
-      });
-    });
-
-    test("shows session info when multiple sessions exist", async () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      await waitFor(() => {
-        expect(screen.getByText(/2 sessions/)).toBeInTheDocument();
-        expect(screen.getByText(/Current: Test Session/)).toBeInTheDocument();
-        expect(screen.getByText(/1 messages/)).toBeInTheDocument();
-      });
-    });
-  });
-
-  describe("Session Creation", () => {
-    test("shows create form when New button is clicked", async () => {
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      const newButton = screen.getByRole("button", { name: /New/ });
-      fireEvent.click(newButton);
-
-      expect(screen.getByText("Create New Session")).toBeInTheDocument();
-      expect(
-        screen.getByPlaceholderText("Session name (optional)")
-      ).toBeInTheDocument();
-      expect(
-        screen.getByRole("button", { name: "Create" })
-      ).toBeInTheDocument();
-      expect(
-        screen.getByRole("button", { name: "Cancel" })
-      ).toBeInTheDocument();
-    });
-
-    test("creates session with custom name", async () => {
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      const newButton = screen.getByRole("button", { name: /New/ });
-      fireEvent.click(newButton);
-
-      const nameInput = screen.getByPlaceholderText("Session name (optional)");
-      fireEvent.change(nameInput, { target: { value: "Custom Session" } });
-
-      const createButton = screen.getByRole("button", { name: "Create" });
-      fireEvent.click(createButton);
-
-      expect(localStorageMock.setItem).toHaveBeenCalledWith(
-        "chat-playground-sessions",
-        expect.stringContaining("Custom Session")
-      );
-      expect(mockOnSessionChange).toHaveBeenCalled();
-    });
-
-    test("creates session with default name when no name provided", async () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      const newButton = screen.getByRole("button", { name: /New/ });
-      fireEvent.click(newButton);
-
-      const createButton = screen.getByRole("button", { name: "Create" });
-      fireEvent.click(createButton);
-
-      expect(localStorageMock.setItem).toHaveBeenCalledWith(
-        "chat-playground-sessions",
-        expect.stringContaining("Session 3")
-      );
-    });
-
-    test("cancels session creation", async () => {
+    test("renders session selector when agent sessions are loaded", async () => {
       await act(async () => {
         render(
           <SessionManager
+            selectedAgentId="agent_123"
             currentSession={null}
             onSessionChange={mockOnSessionChange}
             onNewSession={mockOnNewSession}
@@ -245,78 +175,16 @@ describe("SessionManager", () => {
         );
       });
 
-      const newButton = screen.getByRole("button", { name: /New/ });
-      fireEvent.click(newButton);
-
-      const nameInput = screen.getByPlaceholderText("Session name (optional)");
-      fireEvent.change(nameInput, { target: { value: "Test Input" } });
-
-      localStorageMock.setItem.mockClear();
-
-      const cancelButton = screen.getByRole("button", { name: "Cancel" });
-      fireEvent.click(cancelButton);
-
-      expect(screen.queryByText("Create New Session")).not.toBeInTheDocument();
-      expect(localStorageMock.setItem).not.toHaveBeenCalled();
+      await waitFor(() => {
+        expect(screen.getByText("Select Session")).toBeInTheDocument();
+      });
     });
 
-    test("creates session on Enter key press", async () => {
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      const newButton = screen.getByRole("button", { name: /New/ });
-      fireEvent.click(newButton);
-
-      const nameInput = screen.getByPlaceholderText("Session name (optional)");
-      fireEvent.change(nameInput, { target: { value: "Enter Session" } });
-      fireEvent.keyDown(nameInput, { key: "Enter" });
-
-      expect(localStorageMock.setItem).toHaveBeenCalledWith(
-        "chat-playground-sessions",
-        expect.stringContaining("Enter Session")
-      );
-    });
-
-    test("cancels session creation on Escape key press", async () => {
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      const newButton = screen.getByRole("button", { name: /New/ });
-      fireEvent.click(newButton);
-
-      const nameInput = screen.getByPlaceholderText("Session name (optional)");
-      fireEvent.keyDown(nameInput, { key: "Escape" });
-
-      expect(screen.queryByText("Create New Session")).not.toBeInTheDocument();
-    });
-  });
-
-  describe("Session Switching", () => {
-    test("switches to selected session", async () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
+    test("renders current session name when session is selected", async () => {
       await act(async () => {
         render(
           <SessionManager
+            selectedAgentId="agent_123"
             currentSession={mockSession}
             onSessionChange={mockOnSessionChange}
             onNewSession={mockOnNewSession}
@@ -327,40 +195,15 @@ describe("SessionManager", () => {
       await waitFor(() => {
         expect(screen.getByText("Test Session")).toBeInTheDocument();
       });
-
-      const selectTrigger = screen.getByRole("combobox");
-      fireEvent.click(selectTrigger);
-
-      await waitFor(() => {
-        const anotherSessionOption = screen.getByText("Another Session");
-        fireEvent.click(anotherSessionOption);
-      });
-
-      expect(localStorageMock.setItem).toHaveBeenCalledWith(
-        "chat-playground-current-session",
-        "session_456"
-      );
-      expect(mockOnSessionChange).toHaveBeenCalledWith(
-        expect.objectContaining({
-          id: "session_456",
-          name: "Another Session",
-        })
-      );
     });
   });
 
-  describe("LocalStorage Integration", () => {
-    test("loads sessions from localStorage on mount", async () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
+  describe("Agent API Integration", () => {
+    test("loads sessions from agent API on mount", async () => {
       await act(async () => {
         render(
           <SessionManager
+            selectedAgentId="agent_123"
             currentSession={mockSession}
             onSessionChange={mockOnSessionChange}
             onNewSession={mockOnNewSession}
@@ -369,19 +212,22 @@ describe("SessionManager", () => {
       });
 
       await waitFor(() => {
-        expect(localStorageMock.getItem).toHaveBeenCalledWith(
-          "chat-playground-sessions"
+        expect(mockClient.agents.session.list).toHaveBeenCalledWith(
+          "agent_123"
         );
       });
     });
 
-    test("handles corrupted localStorage data gracefully", async () => {
-      localStorageMock.getItem.mockReturnValue("invalid json");
-      const consoleSpy = jest.spyOn(console, "error").mockImplementation();
+    test("handles API errors gracefully", async () => {
+      mockClient.agents.session.list.mockRejectedValue(new Error("API Error"));
+      const consoleSpy = jest
+        .spyOn(console, "error")
+        .mockImplementation(() => {});
 
       await act(async () => {
         render(
           <SessionManager
+            selectedAgentId="agent_123"
             currentSession={mockSession}
             onSessionChange={mockOnSessionChange}
             onNewSession={mockOnNewSession}
@@ -389,211 +235,30 @@ describe("SessionManager", () => {
         );
       });
 
-      expect(consoleSpy).toHaveBeenCalledWith(
-        "Error parsing JSON:",
-        expect.any(Error)
-      );
+      await waitFor(() => {
+        expect(consoleSpy).toHaveBeenCalledWith(
+          "Error loading agent sessions:",
+          expect.any(Error)
+        );
+      });
 
       consoleSpy.mockRestore();
     });
-
-    test("updates localStorage when current session changes", async () => {
-      const updatedSession = {
-        ...mockSession,
-        messages: [
-          ...mockSession.messages,
-          {
-            id: "msg_2",
-            role: "assistant" as const,
-            content: "Hello back!",
-            timestamp: Date.now(),
-          },
-        ],
-        updatedAt: Date.now(),
-      };
-
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify([mockSession]);
-        }
-        return null;
-      });
-
-      const { rerender } = render(
-        <SessionManager
-          currentSession={mockSession}
-          onSessionChange={mockOnSessionChange}
-          onNewSession={mockOnNewSession}
-        />
-      );
-
-      await act(async () => {
-        rerender(
-          <SessionManager
-            currentSession={updatedSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      await waitFor(() => {
-        expect(localStorageMock.setItem).toHaveBeenCalledWith(
-          "chat-playground-sessions",
-          expect.stringContaining(updatedSession.id)
-        );
-      });
-    });
-  });
-
-  describe("Session Deletion", () => {
-    test("shows delete button only when multiple sessions exist", async () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify([mockSession]);
-        }
-        return null;
-      });
-
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      expect(
-        screen.queryByTitle("Delete current session")
-      ).not.toBeInTheDocument();
-
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      const { rerender } = render(
-        <SessionManager
-          currentSession={mockSession}
-          onSessionChange={mockOnSessionChange}
-          onNewSession={mockOnNewSession}
-        />
-      );
-
-      await act(async () => {
-        rerender(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      await waitFor(() => {
-        expect(screen.getByTitle("Delete current session")).toBeInTheDocument();
-      });
-    });
-
-    test("deletes current session after confirmation", async () => {
-      window.confirm = jest.fn().mockReturnValue(true);
-
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      await waitFor(() => {
-        expect(screen.getByTitle("Delete current session")).toBeInTheDocument();
-      });
-
-      const deleteButton = screen.getByTitle("Delete current session");
-      fireEvent.click(deleteButton);
-
-      expect(window.confirm).toHaveBeenCalledWith(
-        "Are you sure you want to delete this session? This action cannot be undone."
-      );
-      expect(mockOnSessionChange).toHaveBeenCalled();
-    });
-
-    test("cancels deletion when user rejects confirmation", async () => {
-      window.confirm = jest.fn().mockReturnValue(false);
-
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      await waitFor(() => {
-        expect(screen.getByTitle("Delete current session")).toBeInTheDocument();
-      });
-
-      const deleteButton = screen.getByTitle("Delete current session");
-      fireEvent.click(deleteButton);
-
-      expect(window.confirm).toHaveBeenCalled();
-      expect(mockOnSessionChange).not.toHaveBeenCalled();
-    });
-
-    test("prevents deletion of the last remaining session", async () => {
-      const singleSession = [mockSession];
-
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(singleSession);
-        }
-        return null;
-      });
-
-      await act(async () => {
-        render(
-          <SessionManager
-            currentSession={mockSession}
-            onSessionChange={mockOnSessionChange}
-            onNewSession={mockOnNewSession}
-          />
-        );
-      });
-
-      expect(
-        screen.queryByTitle("Delete current session")
-      ).not.toBeInTheDocument();
-    });
   });
 
   describe("Error Handling", () => {
-    test("component renders without crashing when localStorage is unavailable", async () => {
+    test("component renders without crashing when API is unavailable", async () => {
+      mockClient.agents.session.list.mockRejectedValue(
+        new Error("Network Error")
+      );
+      const consoleSpy = jest
+        .spyOn(console, "error")
+        .mockImplementation(() => {});
+
       await act(async () => {
         render(
           <SessionManager
+            selectedAgentId="agent_123"
             currentSession={mockSession}
             onSessionChange={mockOnSessionChange}
             onNewSession={mockOnNewSession}
@@ -601,203 +266,43 @@ describe("SessionManager", () => {
         );
       });
 
-      expect(screen.getByRole("button", { name: /New/ })).toBeInTheDocument();
-      expect(screen.getByText("Test Session")).toBeInTheDocument();
+      // Should still render the session manager with the select trigger
+      expect(screen.getByRole("combobox")).toBeInTheDocument();
+      expect(screen.getByText("+ New")).toBeInTheDocument();
+      consoleSpy.mockRestore();
     });
   });
 });
 
 describe("SessionUtils", () => {
-  const mockSession: ChatSession = {
-    id: "utils_session_123",
-    name: "Utils Test Session",
-    messages: [],
-    selectedModel: "utils-model",
-    selectedVectorDb: "utils-vector-db",
-    systemMessage: "You are a utils assistant.",
-    createdAt: 1710000000,
-    updatedAt: 1710001000,
-  };
-
-  const mockSessions = [mockSession];
-
   beforeEach(() => {
     jest.clearAllMocks();
     localStorageMock.getItem.mockReturnValue(null);
     localStorageMock.setItem.mockImplementation(() => {});
   });
 
-  describe("loadCurrentSession", () => {
-    test("returns null when no current session ID stored", () => {
-      const result = SessionUtils.loadCurrentSession();
-      expect(result).toBeNull();
-    });
+  describe("saveCurrentSessionId", () => {
+    test("saves session ID to localStorage", () => {
+      SessionUtils.saveCurrentSessionId("test-session-id");
 
-    test("returns null when no sessions stored", () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-current-session") {
-          return "session_123";
-        }
-        return null;
-      });
-
-      const result = SessionUtils.loadCurrentSession();
-      expect(result).toBeNull();
-    });
-
-    test("returns current session when found", () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-current-session") {
-          return "utils_session_123";
-        }
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      const result = SessionUtils.loadCurrentSession();
-      expect(result).toEqual(mockSession);
-    });
-
-    test("returns null when current session ID not found in sessions", () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-current-session") {
-          return "nonexistent_session";
-        }
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      const result = SessionUtils.loadCurrentSession();
-      expect(result).toBeNull();
-    });
-
-    test("handles corrupted sessions data gracefully", () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-current-session") {
-          return "session_123";
-        }
-        if (key === "chat-playground-sessions") {
-          return "invalid json";
-        }
-        return null;
-      });
-
-      const consoleSpy = jest.spyOn(console, "error").mockImplementation();
-      const result = SessionUtils.loadCurrentSession();
-
-      expect(result).toBeNull();
-      expect(consoleSpy).toHaveBeenCalledWith(
-        "Error parsing JSON:",
-        expect.any(Error)
-      );
-
-      consoleSpy.mockRestore();
-    });
-  });
-
-  describe("saveCurrentSession", () => {
-    test("saves new session to localStorage", () => {
-      localStorageMock.setItem.mockClear();
-
-      SessionUtils.saveCurrentSession(mockSession);
-
-      expect(localStorageMock.setItem).toHaveBeenCalledWith(
-        "chat-playground-sessions",
-        expect.stringContaining(mockSession.id)
-      );
       expect(localStorageMock.setItem).toHaveBeenCalledWith(
         "chat-playground-current-session",
-        mockSession.id
+        "test-session-id"
       );
     });
-
-    test("updates existing session in localStorage", () => {
-      localStorageMock.setItem.mockClear();
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      const updatedSession = {
-        ...mockSession,
-        name: "Updated Session Name",
-        messages: [
-          {
-            id: "msg_1",
-            role: "user" as const,
-            content: "Test message",
-            timestamp: Date.now(),
-          },
-        ],
-      };
-
-      SessionUtils.saveCurrentSession(updatedSession);
-
-      expect(localStorageMock.setItem).toHaveBeenCalledWith(
-        "chat-playground-sessions",
-        expect.stringContaining("Updated Session Name")
-      );
-      expect(localStorageMock.setItem).toHaveBeenCalledWith(
-        "chat-playground-current-session",
-        updatedSession.id
-      );
-    });
-
-    test("handles corrupted sessions data gracefully", () => {
-      localStorageMock.setItem.mockClear();
-      localStorageMock.getItem.mockReturnValue("invalid json");
-      const consoleSpy = jest.spyOn(console, "error").mockImplementation();
-
-      SessionUtils.saveCurrentSession(mockSession);
-
-      expect(consoleSpy).toHaveBeenCalledWith(
-        "Error parsing JSON:",
-        expect.any(Error)
-      );
-      expect(localStorageMock.setItem).toHaveBeenCalledWith(
-        "chat-playground-sessions",
-        expect.stringContaining(mockSession.id)
-      );
-
-      consoleSpy.mockRestore();
-    });
-
-    test("updates timestamps correctly", () => {
-      localStorageMock.setItem.mockClear();
-      const originalNow = Date.now;
-      const mockTime = 1710005000;
-      Date.now = jest.fn(() => mockTime);
-
-      SessionUtils.saveCurrentSession(mockSession);
-
-      const savedSessionsCall = localStorageMock.setItem.mock.calls.find(
-        call => call[0] === "chat-playground-sessions"
-      );
-      const savedSessions = JSON.parse(savedSessionsCall[1]);
-
-      expect(savedSessions[0].updatedAt).toBe(mockTime);
-
-      Date.now = originalNow;
-    });
   });
 
   describe("createDefaultSession", () => {
-    test("creates default session with default values", () => {
-      const result = SessionUtils.createDefaultSession();
+    test("creates default session with agent ID", () => {
+      const result = SessionUtils.createDefaultSession("agent_123");
 
       expect(result).toEqual(
         expect.objectContaining({
           name: "Default Session",
           messages: [],
           selectedModel: "",
-          selectedVectorDb: "",
           systemMessage: "You are a helpful assistant.",
+          agentId: "agent_123",
         })
       );
       expect(result.id).toBeTruthy();
@@ -806,20 +311,13 @@ describe("SessionUtils", () => {
     });
 
     test("creates default session with inherited model", () => {
-      const result = SessionUtils.createDefaultSession("inherited-model");
-
-      expect(result.selectedModel).toBe("inherited-model");
-      expect(result.selectedVectorDb).toBe("");
-    });
-
-    test("creates default session with inherited model and vector db", () => {
       const result = SessionUtils.createDefaultSession(
-        "inherited-model",
-        "inherited-vector-db"
+        "agent_123",
+        "inherited-model"
       );
 
       expect(result.selectedModel).toBe("inherited-model");
-      expect(result.selectedVectorDb).toBe("inherited-vector-db");
+      expect(result.agentId).toBe("agent_123");
     });
 
     test("creates unique session IDs", () => {
@@ -827,8 +325,8 @@ describe("SessionUtils", () => {
       let mockTime = 1710005000;
       Date.now = jest.fn(() => ++mockTime);
 
-      const session1 = SessionUtils.createDefaultSession();
-      const session2 = SessionUtils.createDefaultSession();
+      const session1 = SessionUtils.createDefaultSession("agent_123");
+      const session2 = SessionUtils.createDefaultSession("agent_123");
 
       expect(session1.id).not.toBe(session2.id);
 
@@ -836,7 +334,7 @@ describe("SessionUtils", () => {
     });
 
     test("sets creation and update timestamps", () => {
-      const result = SessionUtils.createDefaultSession();
+      const result = SessionUtils.createDefaultSession("agent_123");
 
       expect(result.createdAt).toBeTruthy();
       expect(result.updatedAt).toBeTruthy();
@@ -844,97 +342,4 @@ describe("SessionUtils", () => {
       expect(typeof result.updatedAt).toBe("number");
     });
   });
-
-  describe("deleteSession", () => {
-    test("deletes session and returns deleted session info", () => {
-      localStorageMock.setItem.mockClear();
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        if (key === "chat-playground-current-session") {
-          return "utils_session_123";
-        }
-        return null;
-      });
-
-      const result = SessionUtils.deleteSession("utils_session_123");
-
-      expect(result.deletedSession).toEqual(mockSession);
-      expect(result.remainingSessions).toHaveLength(0);
-      expect(localStorageMock.setItem).toHaveBeenCalledWith(
-        "chat-playground-sessions",
-        "[]"
-      );
-    });
-
-    test("removes current session key when deleting current session", () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        if (key === "chat-playground-current-session") {
-          return "utils_session_123";
-        }
-        return null;
-      });
-
-      SessionUtils.deleteSession("utils_session_123");
-
-      expect(localStorageMock.removeItem).toHaveBeenCalledWith(
-        "chat-playground-current-session"
-      );
-    });
-
-    test("does not remove current session key when deleting different session", () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify([
-            mockSession,
-            { ...mockSession, id: "other_session" },
-          ]);
-        }
-        if (key === "chat-playground-current-session") {
-          return "utils_session_123";
-        }
-        return null;
-      });
-
-      SessionUtils.deleteSession("other_session");
-
-      expect(localStorageMock.removeItem).not.toHaveBeenCalledWith(
-        "chat-playground-current-session"
-      );
-    });
-
-    test("returns null for non-existent session", () => {
-      localStorageMock.getItem.mockImplementation(key => {
-        if (key === "chat-playground-sessions") {
-          return JSON.stringify(mockSessions);
-        }
-        return null;
-      });
-
-      const result = SessionUtils.deleteSession("non_existent");
-
-      expect(result.deletedSession).toBeNull();
-      expect(result.remainingSessions).toEqual(mockSessions);
-    });
-
-    test("handles corrupted sessions data gracefully", () => {
-      localStorageMock.getItem.mockReturnValue("invalid json");
-      const consoleSpy = jest.spyOn(console, "error").mockImplementation();
-
-      const result = SessionUtils.deleteSession("any_session");
-
-      expect(result.deletedSession).toBeNull();
-      expect(result.remainingSessions).toEqual([]);
-      expect(consoleSpy).toHaveBeenCalledWith(
-        "Error parsing JSON:",
-        expect.any(Error)
-      );
-
-      consoleSpy.mockRestore();
-    });
-  });
 });
diff --git a/llama_stack/ui/components/chat-playground/session-manager.tsx b/llama_stack/ui/components/chat-playground/session-manager.tsx
index cd035df14..6b61eab34 100644
--- a/llama_stack/ui/components/chat-playground/session-manager.tsx
+++ b/llama_stack/ui/components/chat-playground/session-manager.tsx
@@ -1,6 +1,6 @@
 "use client";
 
-import { useState, useEffect } from "react";
+import { useState, useEffect, useCallback } from "react";
 import { Button } from "@/components/ui/button";
 import {
   Select,
@@ -13,14 +13,20 @@ import { Input } from "@/components/ui/input";
 import { Card } from "@/components/ui/card";
 import { Trash2 } from "lucide-react";
 import type { Message } from "@/components/chat-playground/chat-message";
+import { useAuthClient } from "@/hooks/use-auth-client";
+import type {
+  Session,
+  SessionCreateParams,
+} from "llama-stack-client/resources/agents";
 
-interface ChatSession {
+export interface ChatSession {
   id: string;
   name: string;
   messages: Message[];
   selectedModel: string;
-  selectedVectorDb: string;
   systemMessage: string;
+  agentId: string;
+  session?: Session;
   createdAt: number;
   updatedAt: number;
 }
@@ -29,9 +35,9 @@ interface SessionManagerProps {
   currentSession: ChatSession | null;
   onSessionChange: (session: ChatSession) => void;
   onNewSession: () => void;
+  selectedAgentId: string;
 }
 
-const SESSIONS_STORAGE_KEY = "chat-playground-sessions";
 const CURRENT_SESSION_KEY = "chat-playground-current-session";
 
 // ensures this only happens client side
@@ -63,16 +69,6 @@ const safeLocalStorage = {
   },
 };
 
-function safeJsonParse<T>(jsonString: string | null, fallback: T): T {
-  if (!jsonString) return fallback;
-  try {
-    return JSON.parse(jsonString) as T;
-  } catch (err) {
-    console.error("Error parsing JSON:", err);
-    return fallback;
-  }
-}
-
 const generateSessionId = (): string => {
   return globalThis.crypto.randomUUID();
 };
@@ -80,60 +76,202 @@ const generateSessionId = (): string => {
 export function SessionManager({
   currentSession,
   onSessionChange,
+  selectedAgentId,
 }: SessionManagerProps) {
   const [sessions, setSessions] = useState<ChatSession[]>([]);
   const [showCreateForm, setShowCreateForm] = useState(false);
   const [newSessionName, setNewSessionName] = useState("");
+  const [loading, setLoading] = useState(false);
+  const client = useAuthClient();
+
+  const loadAgentSessions = useCallback(async () => {
+    if (!selectedAgentId) return;
+
+    setLoading(true);
+    try {
+      const response = await client.agents.session.list(selectedAgentId);
+      console.log("Sessions response:", response);
+
+      if (!response.data || !Array.isArray(response.data)) {
+        console.warn("Invalid sessions response, starting fresh");
+        setSessions([]);
+        return;
+      }
+
+      const agentSessions: ChatSession[] = response.data
+        .filter(sessionData => {
+          const isValid =
+            sessionData &&
+            typeof sessionData === "object" &&
+            sessionData.session_id &&
+            sessionData.session_name;
+          if (!isValid) {
+            console.warn("Filtering out invalid session:", sessionData);
+          }
+          return isValid;
+        })
+        .map(sessionData => ({
+          id: sessionData.session_id,
+          name: sessionData.session_name,
+          messages: [],
+          selectedModel: currentSession?.selectedModel || "",
+          systemMessage:
+            currentSession?.systemMessage || "You are a helpful assistant.",
+          agentId: selectedAgentId,
+          session: sessionData,
+          createdAt: sessionData.started_at
+            ? new Date(sessionData.started_at).getTime()
+            : Date.now(),
+          updatedAt: sessionData.started_at
+            ? new Date(sessionData.started_at).getTime()
+            : Date.now(),
+        }));
+      setSessions(agentSessions);
+    } catch (error) {
+      console.error("Error loading agent sessions:", error);
+      setSessions([]);
+    } finally {
+      setLoading(false);
+    }
+  }, [
+    selectedAgentId,
+    client,
+    currentSession?.selectedModel,
+    currentSession?.systemMessage,
+  ]);
 
   useEffect(() => {
-    const savedSessions = safeLocalStorage.getItem(SESSIONS_STORAGE_KEY);
-    const sessions = safeJsonParse<ChatSession[]>(savedSessions, []);
-    setSessions(sessions);
-  }, []);
+    if (selectedAgentId) {
+      loadAgentSessions();
+    }
+  }, [selectedAgentId, loadAgentSessions]);
 
-  const saveSessions = (updatedSessions: ChatSession[]) => {
-    setSessions(updatedSessions);
-    safeLocalStorage.setItem(
-      SESSIONS_STORAGE_KEY,
-      JSON.stringify(updatedSessions)
-    );
-  };
+  const createNewSession = async () => {
+    if (!selectedAgentId) return;
 
-  const createNewSession = () => {
     const sessionName =
       newSessionName.trim() || `Session ${sessions.length + 1}`;
-    const newSession: ChatSession = {
-      id: generateSessionId(),
-      name: sessionName,
-      messages: [],
-      selectedModel: currentSession?.selectedModel || "",
-      selectedVectorDb: currentSession?.selectedVectorDb || "",
-      systemMessage:
-        currentSession?.systemMessage || "You are a helpful assistant.",
-      createdAt: Date.now(),
-      updatedAt: Date.now(),
-    };
+    setLoading(true);
 
-    const updatedSessions = [...sessions, newSession];
-    saveSessions(updatedSessions);
+    try {
+      const response = await client.agents.session.create(selectedAgentId, {
+        session_name: sessionName,
+      } as SessionCreateParams);
 
-    safeLocalStorage.setItem(CURRENT_SESSION_KEY, newSession.id);
-    onSessionChange(newSession);
+      const newSession: ChatSession = {
+        id: response.session_id,
+        name: sessionName,
+        messages: [],
+        selectedModel: currentSession?.selectedModel || "",
+        systemMessage:
+          currentSession?.systemMessage || "You are a helpful assistant.",
+        agentId: selectedAgentId,
+        createdAt: Date.now(),
+        updatedAt: Date.now(),
+      };
 
-    setNewSessionName("");
-    setShowCreateForm(false);
-  };
+      setSessions(prev => [...prev, newSession]);
+      SessionUtils.saveCurrentSessionId(newSession.id, selectedAgentId);
+      onSessionChange(newSession);
 
-  const switchToSession = (sessionId: string) => {
-    const session = sessions.find(s => s.id === sessionId);
-    if (session) {
-      safeLocalStorage.setItem(CURRENT_SESSION_KEY, sessionId);
-      onSessionChange(session);
+      setNewSessionName("");
+      setShowCreateForm(false);
+    } catch (error) {
+      console.error("Error creating session:", error);
+    } finally {
+      setLoading(false);
     }
   };
 
-  const deleteSession = (sessionId: string) => {
-    if (sessions.length <= 1) {
+  const loadSessionMessages = useCallback(
+    async (agentId: string, sessionId: string): Promise<Message[]> => {
+      try {
+        const session = await client.agents.session.retrieve(
+          agentId,
+          sessionId
+        );
+
+        if (!session || !session.turns || !Array.isArray(session.turns)) {
+          return [];
+        }
+
+        const messages: Message[] = [];
+        for (const turn of session.turns) {
+          // Add user messages from input_messages
+          if (turn.input_messages && Array.isArray(turn.input_messages)) {
+            for (const input of turn.input_messages) {
+              if (input.role === "user" && input.content) {
+                messages.push({
+                  id: `${turn.turn_id}-user-${messages.length}`,
+                  role: "user",
+                  content:
+                    typeof input.content === "string"
+                      ? input.content
+                      : JSON.stringify(input.content),
+                  createdAt: new Date(turn.started_at || Date.now()),
+                });
+              }
+            }
+          }
+
+          // Add assistant message from output_message
+          if (turn.output_message && turn.output_message.content) {
+            messages.push({
+              id: `${turn.turn_id}-assistant-${messages.length}`,
+              role: "assistant",
+              content:
+                typeof turn.output_message.content === "string"
+                  ? turn.output_message.content
+                  : JSON.stringify(turn.output_message.content),
+              createdAt: new Date(
+                turn.completed_at || turn.started_at || Date.now()
+              ),
+            });
+          }
+        }
+
+        return messages;
+      } catch (error) {
+        console.error("Error loading session messages:", error);
+        return [];
+      }
+    },
+    [client]
+  );
+
+  const switchToSession = useCallback(
+    async (sessionId: string) => {
+      const session = sessions.find(s => s.id === sessionId);
+      if (session) {
+        setLoading(true);
+        try {
+          // Load messages for this session
+          const messages = await loadSessionMessages(
+            selectedAgentId,
+            sessionId
+          );
+          const sessionWithMessages = {
+            ...session,
+            messages,
+          };
+
+          SessionUtils.saveCurrentSessionId(sessionId, selectedAgentId);
+          onSessionChange(sessionWithMessages);
+        } catch (error) {
+          console.error("Error switching to session:", error);
+          // Fallback to session without messages
+          SessionUtils.saveCurrentSessionId(sessionId, selectedAgentId);
+          onSessionChange(session);
+        } finally {
+          setLoading(false);
+        }
+      }
+    },
+    [sessions, selectedAgentId, loadSessionMessages, onSessionChange]
+  );
+
+  const deleteSession = async (sessionId: string) => {
+    if (sessions.length <= 1 || !selectedAgentId) {
       return;
     }
 
@@ -142,21 +280,30 @@ export function SessionManager({
         "Are you sure you want to delete this session? This action cannot be undone."
       )
     ) {
-      const updatedSessions = sessions.filter(s => s.id !== sessionId);
-      saveSessions(updatedSessions);
+      setLoading(true);
+      try {
+        await client.agents.session.delete(selectedAgentId, sessionId);
 
-      if (currentSession?.id === sessionId) {
-        const newCurrentSession = updatedSessions[0] || null;
-        if (newCurrentSession) {
-          safeLocalStorage.setItem(CURRENT_SESSION_KEY, newCurrentSession.id);
-          onSessionChange(newCurrentSession);
-        } else {
-          safeLocalStorage.removeItem(CURRENT_SESSION_KEY);
-          const defaultSession = SessionUtils.createDefaultSession();
-          saveSessions([defaultSession]);
-          safeLocalStorage.setItem(CURRENT_SESSION_KEY, defaultSession.id);
-          onSessionChange(defaultSession);
+        const updatedSessions = sessions.filter(s => s.id !== sessionId);
+        setSessions(updatedSessions);
+
+        if (currentSession?.id === sessionId) {
+          const newCurrentSession = updatedSessions[0] || null;
+          if (newCurrentSession) {
+            SessionUtils.saveCurrentSessionId(
+              newCurrentSession.id,
+              selectedAgentId
+            );
+            onSessionChange(newCurrentSession);
+          } else {
+            SessionUtils.clearCurrentSession(selectedAgentId);
+            onNewSession();
+          }
         }
+      } catch (error) {
+        console.error("Error deleting session:", error);
+      } finally {
+        setLoading(false);
       }
     }
   };
@@ -172,16 +319,16 @@ export function SessionManager({
           updatedSessions.push(currentSession);
         }
 
-        safeLocalStorage.setItem(
-          SESSIONS_STORAGE_KEY,
-          JSON.stringify(updatedSessions)
-        );
-
         return updatedSessions;
       });
     }
   }, [currentSession]);
 
+  // Don't render if no agent is selected
+  if (!selectedAgentId) {
+    return null;
+  }
+
   return (
     <div className="relative">
       <div className="flex items-center gap-2">
@@ -205,6 +352,7 @@ export function SessionManager({
           onClick={() => setShowCreateForm(true)}
           variant="outline"
           size="sm"
+          disabled={loading || !selectedAgentId}
         >
           + New
         </Button>
@@ -241,8 +389,12 @@ export function SessionManager({
           />
 
           <div className="flex gap-2">
-            <Button onClick={createNewSession} className="flex-1">
-              Create
+            <Button
+              onClick={createNewSession}
+              className="flex-1"
+              disabled={loading}
+            >
+              {loading ? "Creating..." : "Create"}
             </Button>
             <Button
               variant="outline"
@@ -270,72 +422,147 @@ export function SessionManager({
 }
 
 export const SessionUtils = {
-  loadCurrentSession: (): ChatSession | null => {
-    const currentSessionId = safeLocalStorage.getItem(CURRENT_SESSION_KEY);
-    const savedSessions = safeLocalStorage.getItem(SESSIONS_STORAGE_KEY);
-
-    if (currentSessionId && savedSessions) {
-      const sessions = safeJsonParse<ChatSession[]>(savedSessions, []);
-      return sessions.find(s => s.id === currentSessionId) || null;
-    }
-    return null;
+  loadCurrentSessionId: (agentId?: string): string | null => {
+    const key = agentId
+      ? `${CURRENT_SESSION_KEY}-${agentId}`
+      : CURRENT_SESSION_KEY;
+    return safeLocalStorage.getItem(key);
   },
 
-  saveCurrentSession: (session: ChatSession) => {
-    const savedSessions = safeLocalStorage.getItem(SESSIONS_STORAGE_KEY);
-    const sessions = safeJsonParse<ChatSession[]>(savedSessions, []);
-
-    const existingIndex = sessions.findIndex(s => s.id === session.id);
-    if (existingIndex >= 0) {
-      sessions[existingIndex] = { ...session, updatedAt: Date.now() };
-    } else {
-      sessions.push({
-        ...session,
-        createdAt: Date.now(),
-        updatedAt: Date.now(),
-      });
-    }
-
-    safeLocalStorage.setItem(SESSIONS_STORAGE_KEY, JSON.stringify(sessions));
-    safeLocalStorage.setItem(CURRENT_SESSION_KEY, session.id);
+  saveCurrentSessionId: (sessionId: string, agentId?: string) => {
+    const key = agentId
+      ? `${CURRENT_SESSION_KEY}-${agentId}`
+      : CURRENT_SESSION_KEY;
+    safeLocalStorage.setItem(key, sessionId);
   },
 
   createDefaultSession: (
-    inheritModel?: string,
-    inheritVectorDb?: string
+    agentId: string,
+    inheritModel?: string
   ): ChatSession => ({
     id: generateSessionId(),
     name: "Default Session",
     messages: [],
     selectedModel: inheritModel || "",
-    selectedVectorDb: inheritVectorDb || "",
     systemMessage: "You are a helpful assistant.",
+    agentId,
     createdAt: Date.now(),
     updatedAt: Date.now(),
   }),
 
-  deleteSession: (
-    sessionId: string
-  ): {
-    deletedSession: ChatSession | null;
-    remainingSessions: ChatSession[];
-  } => {
-    const savedSessions = safeLocalStorage.getItem(SESSIONS_STORAGE_KEY);
-    const sessions = safeJsonParse<ChatSession[]>(savedSessions, []);
+  clearCurrentSession: (agentId?: string) => {
+    const key = agentId
+      ? `${CURRENT_SESSION_KEY}-${agentId}`
+      : CURRENT_SESSION_KEY;
+    safeLocalStorage.removeItem(key);
+  },
 
-    const sessionToDelete = sessions.find(s => s.id === sessionId);
-    const remainingSessions = sessions.filter(s => s.id !== sessionId);
+  loadCurrentAgentId: (): string | null => {
+    return safeLocalStorage.getItem("chat-playground-current-agent");
+  },
 
+  saveCurrentAgentId: (agentId: string) => {
+    safeLocalStorage.setItem("chat-playground-current-agent", agentId);
+  },
+
+  // Comprehensive session caching
+  saveSessionData: (agentId: string, sessionData: ChatSession) => {
+    const key = `chat-playground-session-data-${agentId}-${sessionData.id}`;
     safeLocalStorage.setItem(
-      SESSIONS_STORAGE_KEY,
-      JSON.stringify(remainingSessions)
+      key,
+      JSON.stringify({
+        ...sessionData,
+        cachedAt: Date.now(),
+      })
     );
+  },
 
-    const currentSessionId = safeLocalStorage.getItem(CURRENT_SESSION_KEY);
-    if (currentSessionId === sessionId) {
-      safeLocalStorage.removeItem(CURRENT_SESSION_KEY);
+  loadSessionData: (agentId: string, sessionId: string): ChatSession | null => {
+    const key = `chat-playground-session-data-${agentId}-${sessionId}`;
+    const cached = safeLocalStorage.getItem(key);
+    if (!cached) return null;
+
+    try {
+      const data = JSON.parse(cached);
+      // Check if cache is fresh (less than 1 hour old)
+      const cacheAge = Date.now() - (data.cachedAt || 0);
+      if (cacheAge > 60 * 60 * 1000) {
+        safeLocalStorage.removeItem(key);
+        return null;
+      }
+
+      // Convert date strings back to Date objects
+      return {
+        ...data,
+        messages: data.messages.map(
+          (msg: { createdAt: string; [key: string]: unknown }) => ({
+            ...msg,
+            createdAt: new Date(msg.createdAt),
+          })
+        ),
+      };
+    } catch (error) {
+      console.error("Error parsing cached session data:", error);
+      safeLocalStorage.removeItem(key);
+      return null;
     }
+  },
 
-    return { deletedSession: sessionToDelete || null, remainingSessions };
+  // Agent config caching
+  saveAgentConfig: (
+    agentId: string,
+    config: {
+      toolgroups?: Array<
+        string | { name: string; args: Record<string, unknown> }
+      >;
+      [key: string]: unknown;
+    }
+  ) => {
+    const key = `chat-playground-agent-config-${agentId}`;
+    safeLocalStorage.setItem(
+      key,
+      JSON.stringify({
+        config,
+        cachedAt: Date.now(),
+      })
+    );
+  },
+
+  loadAgentConfig: (
+    agentId: string
+  ): {
+    toolgroups?: Array<
+      string | { name: string; args: Record<string, unknown> }
+    >;
+    [key: string]: unknown;
+  } | null => {
+    const key = `chat-playground-agent-config-${agentId}`;
+    const cached = safeLocalStorage.getItem(key);
+    if (!cached) return null;
+
+    try {
+      const data = JSON.parse(cached);
+      // Check if cache is fresh (less than 30 minutes old)
+      const cacheAge = Date.now() - (data.cachedAt || 0);
+      if (cacheAge > 30 * 60 * 1000) {
+        safeLocalStorage.removeItem(key);
+        return null;
+      }
+      return data.config;
+    } catch (error) {
+      console.error("Error parsing cached agent config:", error);
+      safeLocalStorage.removeItem(key);
+      return null;
+    }
+  },
+
+  // Clear all cached data for an agent
+  clearAgentCache: (agentId: string) => {
+    const keys = Object.keys(localStorage).filter(
+      key =>
+        key.includes(`chat-playground-session-data-${agentId}`) ||
+        key.includes(`chat-playground-agent-config-${agentId}`)
+    );
+    keys.forEach(key => safeLocalStorage.removeItem(key));
   },
 };
diff --git a/llama_stack/ui/components/chat-playground/typing-indicator.tsx b/llama_stack/ui/components/chat-playground/typing-indicator.tsx
index 8950c066b..3b5a560b7 100644
--- a/llama_stack/ui/components/chat-playground/typing-indicator.tsx
+++ b/llama_stack/ui/components/chat-playground/typing-indicator.tsx
@@ -5,9 +5,9 @@ export function TypingIndicator() {
     <div className="justify-left flex space-x-1">
       <div className="rounded-lg bg-muted p-3">
         <div className="flex -space-x-2.5">
-          <Dot className="h-5 w-5 animate-typing-dot-bounce" />
-          <Dot className="h-5 w-5 animate-typing-dot-bounce [animation-delay:90ms]" />
-          <Dot className="h-5 w-5 animate-typing-dot-bounce [animation-delay:180ms]" />
+          <Dot className="h-5 w-5 animate-typing-dot-1" />
+          <Dot className="h-5 w-5 animate-typing-dot-2" />
+          <Dot className="h-5 w-5 animate-typing-dot-3" />
         </div>
       </div>
     </div>
diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/llama_stack/ui/components/layout/app-sidebar.tsx
index bee3d6a70..373f0c5ae 100644
--- a/llama_stack/ui/components/layout/app-sidebar.tsx
+++ b/llama_stack/ui/components/layout/app-sidebar.tsx
@@ -11,6 +11,7 @@ import {
 } from "lucide-react";
 import Link from "next/link";
 import { usePathname } from "next/navigation";
+import Image from "next/image";
 import { cn } from "@/lib/utils";
 
 import {
@@ -110,7 +111,16 @@ export function AppSidebar() {
   return (
     <Sidebar>
       <SidebarHeader>
-        <Link href="/">Llama Stack</Link>
+        <Link href="/" className="flex items-center gap-2 p-2">
+          <Image
+            src="/logo.webp"
+            alt="Llama Stack"
+            width={32}
+            height={32}
+            className="h-8 w-8"
+          />
+          <span className="font-semibold text-lg">Llama Stack</span>
+        </Link>
       </SidebarHeader>
       <SidebarContent>
         <SidebarGroup>
diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json
index bc6263732..2df1cceb3 100644
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@@ -18,7 +18,7 @@
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
         "framer-motion": "^11.18.2",
-        "llama-stack-client": "0.2.17",
+        "llama-stack-client": "^0.2.18",
         "lucide-react": "^0.510.0",
         "next": "15.3.3",
         "next-auth": "^4.24.11",
@@ -9926,9 +9926,9 @@
       "license": "MIT"
     },
     "node_modules/llama-stack-client": {
-      "version": "0.2.17",
-      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.17.tgz",
-      "integrity": "sha512-+/fEO8M7XPiVLjhH7ge18i1ijKp4+h3dOkE0C8g2cvGuDUtDYIJlf8NSyr9OMByjiWpCibWU7VOKL50LwGLS3Q==",
+      "version": "0.2.18",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.18.tgz",
+      "integrity": "sha512-k+xQOz/TIU0cINP4Aih8q6xs7f/6qs0fLDMXTTKQr5C0F1jtCjRiwsas7bTsDfpKfYhg/7Xy/wPw/uZgi6aIVg==",
       "license": "MIT",
       "dependencies": {
         "@types/node": "^18.11.18",
diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json
index fd6f6fbb7..226b06f59 100644
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@@ -23,7 +23,7 @@
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
     "framer-motion": "^11.18.2",
-    "llama-stack-client": "^0.2.17",
+    "llama-stack-client": "^0.2.18",
     "lucide-react": "^0.510.0",
     "next": "15.3.3",
     "next-auth": "^4.24.11",
diff --git a/llama_stack/ui/public/favicon.ico b/llama_stack/ui/public/favicon.ico
new file mode 100644
index 000000000..553368b18
Binary files /dev/null and b/llama_stack/ui/public/favicon.ico differ
diff --git a/llama_stack/ui/public/logo.webp b/llama_stack/ui/public/logo.webp
new file mode 100644
index 000000000..28caa6edd
Binary files /dev/null and b/llama_stack/ui/public/logo.webp differ
diff --git a/pyproject.toml b/pyproject.toml
index f02c02c41..0cdfc6a37 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ required-version = ">=0.7.0"
 
 [project]
 name = "llama_stack"
-version = "0.2.17"
+version = "0.2.18"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
@@ -31,7 +31,7 @@ dependencies = [
     "huggingface-hub>=0.34.0,<1.0",
     "jinja2>=3.1.6",
     "jsonschema",
-    "llama-stack-client>=0.2.17",
+    "llama-stack-client>=0.2.18",
     "llama-api-client>=0.1.2",
     "openai>=1.99.6,<1.100.0",
     "prompt-toolkit",
@@ -56,7 +56,7 @@ dependencies = [
 ui = [
     "streamlit",
     "pandas",
-    "llama-stack-client>=0.2.17",
+    "llama-stack-client>=0.2.18",
     "streamlit-option-menu",
 ]
 
@@ -93,6 +93,7 @@ unit = [
     "blobfile",
     "faiss-cpu",
     "pymilvus>=2.5.12",
+    "milvus-lite>=2.5.0",
     "litellm",
     "together",
     "coverage",
@@ -118,6 +119,7 @@ test = [
     "sqlalchemy[asyncio]>=2.0.41",
     "requests",
     "pymilvus>=2.5.12",
+    "milvus-lite>=2.5.0",
     "weaviate-client>=4.16.4",
 ]
 docs = [
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index f9c797593..b5be71c7c 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import sys
 import time
 import uuid
@@ -19,10 +18,10 @@ from llama_stack.apis.post_training import (
     LoraFinetuningConfig,
     TrainingConfig,
 )
+from llama_stack.log import get_logger
 
 # Configure logging
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="post_training")
 
 
 skip_because_resource_intensive = pytest.mark.skip(
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 7ccca9077..82868164f 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import time
 from io import BytesIO
 
@@ -14,8 +13,9 @@ from openai import BadRequestError as OpenAIBadRequestError
 
 from llama_stack.apis.vector_io import Chunk
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
+from llama_stack.log import get_logger
 
-logger = logging.getLogger(__name__)
+logger = get_logger(name=__name__, category="vector_io")
 
 
 def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models):
@@ -56,6 +56,7 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
         "keyword": [
             "inline::sqlite-vec",
             "remote::milvus",
+            "inline::milvus",
         ],
         "hybrid": [
             "inline::sqlite-vec",
diff --git a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
index b568ce135..1b9657484 100644
--- a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
@@ -45,7 +45,6 @@ from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
 
 
 class TestConvertChatChoiceToResponseMessage:
-    @pytest.mark.asyncio
     async def test_convert_string_content(self):
         choice = OpenAIChoice(
             message=OpenAIAssistantMessageParam(content="Test message"),
@@ -61,7 +60,6 @@ class TestConvertChatChoiceToResponseMessage:
         assert isinstance(result.content[0], OpenAIResponseOutputMessageContentOutputText)
         assert result.content[0].text == "Test message"
 
-    @pytest.mark.asyncio
     async def test_convert_text_param_content(self):
         choice = OpenAIChoice(
             message=OpenAIAssistantMessageParam(
@@ -78,12 +76,10 @@ class TestConvertChatChoiceToResponseMessage:
 
 
 class TestConvertResponseContentToChatContent:
-    @pytest.mark.asyncio
     async def test_convert_string_content(self):
         result = await convert_response_content_to_chat_content("Simple string")
         assert result == "Simple string"
 
-    @pytest.mark.asyncio
     async def test_convert_text_content_parts(self):
         content = [
             OpenAIResponseInputMessageContentText(text="First part"),
@@ -98,7 +94,6 @@ class TestConvertResponseContentToChatContent:
         assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
         assert result[1].text == "Second part"
 
-    @pytest.mark.asyncio
     async def test_convert_image_content(self):
         content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]
 
@@ -111,7 +106,6 @@ class TestConvertResponseContentToChatContent:
 
 
 class TestConvertResponseInputToChatMessages:
-    @pytest.mark.asyncio
     async def test_convert_string_input(self):
         result = await convert_response_input_to_chat_messages("User message")
 
@@ -119,7 +113,6 @@ class TestConvertResponseInputToChatMessages:
         assert isinstance(result[0], OpenAIUserMessageParam)
         assert result[0].content == "User message"
 
-    @pytest.mark.asyncio
     async def test_convert_function_tool_call_output(self):
         input_items = [
             OpenAIResponseInputFunctionToolCallOutput(
@@ -135,7 +128,6 @@ class TestConvertResponseInputToChatMessages:
         assert result[0].content == "Tool output"
         assert result[0].tool_call_id == "call_123"
 
-    @pytest.mark.asyncio
     async def test_convert_function_tool_call(self):
         input_items = [
             OpenAIResponseOutputMessageFunctionToolCall(
@@ -154,7 +146,6 @@ class TestConvertResponseInputToChatMessages:
         assert result[0].tool_calls[0].function.name == "test_function"
         assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
 
-    @pytest.mark.asyncio
     async def test_convert_response_message(self):
         input_items = [
             OpenAIResponseMessage(
@@ -173,7 +164,6 @@ class TestConvertResponseInputToChatMessages:
 
 
 class TestConvertResponseTextToChatResponseFormat:
-    @pytest.mark.asyncio
     async def test_convert_text_format(self):
         text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
         result = await convert_response_text_to_chat_response_format(text)
@@ -181,14 +171,12 @@ class TestConvertResponseTextToChatResponseFormat:
         assert isinstance(result, OpenAIResponseFormatText)
         assert result.type == "text"
 
-    @pytest.mark.asyncio
     async def test_convert_json_object_format(self):
         text = OpenAIResponseText(format={"type": "json_object"})
         result = await convert_response_text_to_chat_response_format(text)
 
         assert isinstance(result, OpenAIResponseFormatJSONObject)
 
-    @pytest.mark.asyncio
     async def test_convert_json_schema_format(self):
         schema_def = {"type": "object", "properties": {"test": {"type": "string"}}}
         text = OpenAIResponseText(
@@ -204,7 +192,6 @@ class TestConvertResponseTextToChatResponseFormat:
         assert result.json_schema["name"] == "test_schema"
         assert result.json_schema["schema"] == schema_def
 
-    @pytest.mark.asyncio
     async def test_default_text_format(self):
         text = OpenAIResponseText()
         result = await convert_response_text_to_chat_response_format(text)
@@ -214,27 +201,22 @@ class TestConvertResponseTextToChatResponseFormat:
 
 
 class TestGetMessageTypeByRole:
-    @pytest.mark.asyncio
     async def test_user_role(self):
         result = await get_message_type_by_role("user")
         assert result == OpenAIUserMessageParam
 
-    @pytest.mark.asyncio
     async def test_system_role(self):
         result = await get_message_type_by_role("system")
         assert result == OpenAISystemMessageParam
 
-    @pytest.mark.asyncio
     async def test_assistant_role(self):
         result = await get_message_type_by_role("assistant")
         assert result == OpenAIAssistantMessageParam
 
-    @pytest.mark.asyncio
     async def test_developer_role(self):
         result = await get_message_type_by_role("developer")
         assert result == OpenAIDeveloperMessageParam
 
-    @pytest.mark.asyncio
     async def test_unknown_role(self):
         result = await get_message_type_by_role("unknown")
         assert result is None
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 5c2ad03ab..ce0e930b1 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -6,7 +6,7 @@
 
 import asyncio
 import json
-import logging
+import logging  # allow-direct-logging
 import threading
 import time
 from http.server import BaseHTTPRequestHandler, HTTPServer
diff --git a/uv.lock b/uv.lock
index 3e3bf7e24..635b2bdfe 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1719,7 +1719,7 @@ wheels = [
 
 [[package]]
 name = "llama-stack"
-version = "0.2.17"
+version = "0.2.18"
 source = { editable = "." }
 dependencies = [
     { name = "aiohttp" },
@@ -1809,6 +1809,7 @@ test = [
     { name = "chardet" },
     { name = "datasets" },
     { name = "mcp" },
+    { name = "milvus-lite" },
     { name = "openai" },
     { name = "pymilvus" },
     { name = "pypdf" },
@@ -1831,6 +1832,7 @@ unit = [
     { name = "faiss-cpu" },
     { name = "litellm" },
     { name = "mcp" },
+    { name = "milvus-lite" },
     { name = "ollama" },
     { name = "openai" },
     { name = "pymilvus" },
@@ -1854,8 +1856,8 @@ requires-dist = [
     { name = "jinja2", specifier = ">=3.1.6" },
     { name = "jsonschema" },
     { name = "llama-api-client", specifier = ">=0.1.2" },
-    { name = "llama-stack-client", specifier = ">=0.2.17" },
-    { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.17" },
+    { name = "llama-stack-client", specifier = ">=0.2.18" },
+    { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.18" },
     { name = "openai", specifier = ">=1.99.6,<1.100.0" },
     { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
     { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
@@ -1925,6 +1927,7 @@ test = [
     { name = "chardet" },
     { name = "datasets" },
     { name = "mcp" },
+    { name = "milvus-lite", specifier = ">=2.5.0" },
     { name = "openai" },
     { name = "pymilvus", specifier = ">=2.5.12" },
     { name = "pypdf" },
@@ -1946,6 +1949,7 @@ unit = [
     { name = "faiss-cpu" },
     { name = "litellm" },
     { name = "mcp" },
+    { name = "milvus-lite", specifier = ">=2.5.0" },
     { name = "ollama" },
     { name = "openai" },
     { name = "pymilvus", specifier = ">=2.5.12" },
@@ -1959,7 +1963,7 @@ unit = [
 
 [[package]]
 name = "llama-stack-client"
-version = "0.2.17"
+version = "0.2.18"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1978,9 +1982,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c5/2a/bb2949d6a5c494d21da0c185d426e25eaa8016f8287b689249afc6c96fb5/llama_stack_client-0.2.17.tar.gz", hash = "sha256:1fe2070133c6356761e394fa346045e9b6b567d4c63157b9bc6be89b9a6e7a41", size = 257636, upload-time = "2025-08-05T01:42:55.911Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/da/5e5a745495f8a2b8ef24fc4d01fe9031aa2277c36447cb22192ec8c8cc1e/llama_stack_client-0.2.18.tar.gz", hash = "sha256:860c885c9e549445178ac55cc9422e6e2a91215ac7aff5aaccfb42f3ce07e79e", size = 277284, upload-time = "2025-08-19T22:12:09.106Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/fc/5eccc86b83c5ced3a3bca071d250a86ccafa4ff17546cf781deb7758ab74/llama_stack_client-0.2.17-py3-none-any.whl", hash = "sha256:336c32f8688700ff64717b8109f405dc87a990fbe310c2027ac9ed6d39d67d16", size = 350329, upload-time = "2025-08-05T01:42:54.381Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/e4/e97f8fdd8a07aa1efc7f7e37b5657d84357b664bf70dd1885a437edc0699/llama_stack_client-0.2.18-py3-none-any.whl", hash = "sha256:90f827d5476f7fc15fd993f1863af6a6e72bd064646bf6a99435eb43a1327f70", size = 367586, upload-time = "2025-08-19T22:12:07.899Z" },
 ]
 
 [[package]]