Merge branch 'main' into nvidia-e2e-notebook

2025-07-22 04:27:52 +00:00 · 2025-04-28 12:00:11 -04:00 · 2025-04-28 12:00:11 -04:00 · 73275f07b7
commit 73275f07b7
parent e24959ea9e c149cf2e0f
123 changed files with 6946 additions and 2220 deletions
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -7,16 +7,16 @@
 import importlib.resources
 import logging
 from pathlib import Path
-from typing import Dict, List

 from pydantic import BaseModel
 from termcolor import cprint

-from llama_stack.distribution.datatypes import BuildConfig, Provider
+from llama_stack.distribution.datatypes import BuildConfig
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.exec import run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
+from llama_stack.templates.template import DistributionTemplate

 log = logging.getLogger(__name__)

@ -37,19 +37,24 @@ class ApiInput(BaseModel):


 def get_provider_dependencies(
-    config_providers: Dict[str, List[Provider]],
+    config: BuildConfig | DistributionTemplate,
 ) -> tuple[list[str], list[str]]:
    """Get normal and special dependencies from provider configuration."""
-    all_providers = get_provider_registry()
+    # Extract providers based on config type
+    if isinstance(config, DistributionTemplate):
+        providers = config.providers
+    elif isinstance(config, BuildConfig):
+        providers = config.distribution_spec.providers
    deps = []
+    registry = get_provider_registry(config)

-    for api_str, provider_or_providers in config_providers.items():
-        providers_for_api = all_providers[Api(api_str)]
+    for api_str, provider_or_providers in providers.items():
+        providers_for_api = registry[Api(api_str)]

        providers = provider_or_providers if isinstance(provider_or_providers, list) else [provider_or_providers]

        for provider in providers:
-            # Providers from BuildConfig and RunConfig are subtly different – not great
+            # Providers from BuildConfig and RunConfig are subtly different – not great
            provider_type = provider if isinstance(provider, str) else provider.provider_type

            if provider_type not in providers_for_api:
@ -71,8 +76,8 @@ def get_provider_dependencies(
    return list(set(normal_deps)), list(set(special_deps))


-def print_pip_install_help(providers: Dict[str, List[Provider]]):
-    normal_deps, special_deps = get_provider_dependencies(providers)
+def print_pip_install_help(config: BuildConfig):
+    normal_deps, special_deps = get_provider_dependencies(config)

    cprint(
        f"Please install needed dependencies using the following commands:\n\nuv pip install {' '.join(normal_deps)}",
@ -88,10 +93,11 @@ def build_image(
    build_file_path: Path,
    image_name: str,
    template_or_config: str,
+    run_config: str | None = None,
 ):
    container_base = build_config.distribution_spec.container_image or "python:3.10-slim"

-    normal_deps, special_deps = get_provider_dependencies(build_config.distribution_spec.providers)
+    normal_deps, special_deps = get_provider_dependencies(build_config)
    normal_deps += SERVER_DEPENDENCIES

    if build_config.image_type == LlamaStackImageType.CONTAINER.value:
@ -103,6 +109,11 @@ def build_image(
            container_base,
            " ".join(normal_deps),
        ]
+
+        # When building from a config file (not a template), include the run config path in the
+        # build arguments
+        if run_config is not None:
+            args.append(run_config)
    elif build_config.image_type == LlamaStackImageType.CONDA.value:
        script = str(importlib.resources.files("llama_stack") / "distribution/build_conda_env.sh")
        args = [
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -19,12 +19,16 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
 # mounting is not supported by docker buildx, so we use COPY instead
 USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}

+# Path to the run.yaml file in the container
+RUN_CONFIG_PATH=/app/run.yaml
+
+BUILD_CONTEXT_DIR=$(pwd)
+
 if [ "$#" -lt 4 ]; then
  # This only works for templates
-  echo "Usage: $0 <template_or_config> <image_name> <container_base> <pip_dependencies> [<special_pip_deps>]" >&2
+  echo "Usage: $0 <template_or_config> <image_name> <container_base> <pip_dependencies> [<run_config>] [<special_pip_deps>]" >&2
  exit 1
 fi
-
 set -euo pipefail

 template_or_config="$1"
@ -35,8 +39,27 @@ container_base="$1"
 shift
 pip_dependencies="$1"
 shift
-special_pip_deps="${1:-}"

+# Handle optional arguments
+run_config=""
+special_pip_deps=""
+
+# Check if there are more arguments
+# The logics is becoming cumbersom, we should refactor it if we can do better
+if [ $# -gt 0 ]; then
+  # Check if the argument ends with .yaml
+  if [[ "$1" == *.yaml ]]; then
+    run_config="$1"
+    shift
+    # If there's another argument after .yaml, it must be special_pip_deps
+    if [ $# -gt 0 ]; then
+      special_pip_deps="$1"
+    fi
+  else
+    # If it's not .yaml, it must be special_pip_deps
+    special_pip_deps="$1"
+  fi
+fi

 # Define color codes
 RED='\033[0;31m'
@ -72,9 +95,13 @@ if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
 FROM $container_base
 WORKDIR /app

-RUN dnf -y update && dnf install -y iputils net-tools wget \
+# We install the Python 3.11 dev headers and build tools so that any
+# C‑extension wheels (e.g. polyleven, faiss‑cpu) can compile successfully.
+
+RUN dnf -y update && dnf install -y iputils git net-tools wget \
    vim-minimal python3.11 python3.11-pip python3.11-wheel \
-    python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
+    python3.11-setuptools python3.11-devel gcc make && \
+    ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all

 ENV UV_SYSTEM_PYTHON=1
 RUN pip install uv
@ -86,7 +113,7 @@ WORKDIR /app

 RUN apt-get update && apt-get install -y \
       iputils-ping net-tools iproute2 dnsutils telnet \
-       curl wget telnet \
+       curl wget telnet git\
       procps psmisc lsof \
       traceroute \
       bubblewrap \
@ -115,6 +142,45 @@ EOF
  done
 fi

+# Function to get Python command
+get_python_cmd() {
+    if is_command_available python; then
+        echo "python"
+    elif is_command_available python3; then
+        echo "python3"
+    else
+        echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2
+        exit 1
+    fi
+}
+
+if [ -n "$run_config" ]; then
+  # Copy the run config to the build context since it's an absolute path
+  cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
+  add_to_container << EOF
+COPY run.yaml $RUN_CONFIG_PATH
+EOF
+
+  # Parse the run.yaml configuration to identify external provider directories
+  # If external providers are specified, copy their directory to the container
+  # and update the configuration to reference the new container path
+  python_cmd=$(get_python_cmd)
+  external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
+  if [ -n "$external_providers_dir" ]; then
+    echo "Copying external providers directory: $external_providers_dir"
+    add_to_container << EOF
+COPY $external_providers_dir /app/providers.d
+EOF
+    # Edit the run.yaml file to change the external_providers_dir to /app/providers.d
+    if [ "$(uname)" = "Darwin" ]; then
+      sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /app/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+      rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
+    else
+      sed -i 's|external_providers_dir:.*|external_providers_dir: /app/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+    fi
+  fi
+fi
+
 stack_mount="/app/llama-stack-source"
 client_mount="/app/llama-stack-client-source"

@ -174,15 +240,16 @@ fi
 RUN pip uninstall -y uv
 EOF

-# if template_or_config ends with .yaml, it is not a template and we should not use the --template flag
-if [[ "$template_or_config" != *.yaml ]]; then
+# If a run config is provided, we use the --config flag
+if [[ -n "$run_config" ]]; then
+  add_to_container << EOF
+ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "$RUN_CONFIG_PATH"]
+EOF
+# If a template is provided (not a yaml file), we use the --template flag
+elif [[ "$template_or_config" != *.yaml ]]; then
  add_to_container << EOF
 ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "$template_or_config"]
 EOF
-else
-  add_to_container << EOF
-ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server"]
-EOF
 fi

 # Add other require item commands genearic to all containers
@ -254,9 +321,10 @@ $CONTAINER_BINARY build \
  "${CLI_ARGS[@]}" \
  -t "$image_tag" \
  -f "$TEMP_DIR/Containerfile" \
-  "."
+  "$BUILD_CONTEXT_DIR"

 # clean up tmp/configs
+rm -f "$BUILD_CONTEXT_DIR/run.yaml"
 set +x

 echo "Success!"
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -326,3 +326,12 @@ class BuildConfig(BaseModel):
        default="conda",
        description="Type of package to build (conda | container | venv)",
    )
+    image_name: Optional[str] = Field(
+        default=None,
+        description="Name of the distribution to build",
+    )
+    external_providers_dir: Optional[str] = Field(
+        default=None,
+        description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
+        "pip_packages MUST contain the provider package name.",
+    )
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -12,7 +12,6 @@ from typing import Any, Dict, List
 import yaml
 from pydantic import BaseModel

-from llama_stack.distribution.datatypes import StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    AdapterSpec,
@ -97,7 +96,9 @@ def _load_inline_provider_spec(spec_data: Dict[str, Any], api: Api, provider_nam
    return spec


-def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dict[str, ProviderSpec]]:
+def get_provider_registry(
+    config=None,
+) -> Dict[Api, Dict[str, ProviderSpec]]:
    """Get the provider registry, optionally including external providers.

    This function loads both built-in providers and external providers from YAML files.
@ -122,7 +123,7 @@ def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dic
          llama-guard.yaml

    Args:
-        config: Optional StackRunConfig containing the external providers directory path
+        config: Optional object containing the external providers directory path

    Returns:
        A dictionary mapping APIs to their available providers
@ -142,7 +143,8 @@ def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dic
        except ImportError as e:
            logger.warning(f"Failed to import module {name}: {e}")

-    if config and config.external_providers_dir:
+    # Check if config has the external_providers_dir attribute
+    if config and hasattr(config, "external_providers_dir") and config.external_providers_dir:
        external_providers_dir = os.path.abspath(config.external_providers_dir)
        if not os.path.exists(external_providers_dir):
            raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}")
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@ -8,6 +8,11 @@ import asyncio
 import time
 from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union

+from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
+from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
+from pydantic import Field, TypeAdapter
+from typing_extensions import Annotated
+
 from llama_stack.apis.common.content_types import (
    URL,
    InterleavedContent,
@ -526,7 +531,7 @@ class InferenceRouter(Inference):
    async def openai_chat_completion(
        self,
        model: str,
-        messages: List[OpenAIMessageParam],
+        messages: Annotated[List[OpenAIMessageParam], Field(..., min_length=1)],
        frequency_penalty: Optional[float] = None,
        function_call: Optional[Union[str, Dict[str, Any]]] = None,
        functions: Optional[List[Dict[str, Any]]] = None,
@ -558,6 +563,16 @@ class InferenceRouter(Inference):
        if model_obj.model_type == ModelType.embedding:
            raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions")

+        # Use the OpenAI client for a bit of extra input validation without
+        # exposing the OpenAI client itself as part of our API surface
+        if tool_choice:
+            TypeAdapter(OpenAIChatCompletionToolChoiceOptionParam).validate_python(tool_choice)
+            if tools is None:
+                raise ValueError("'tool_choice' is only allowed when 'tools' is also provided")
+        if tools:
+            for tool in tools:
+                TypeAdapter(OpenAIChatCompletionToolParam).validate_python(tool)
+
        params = dict(
            model=model_obj.identifier,
            messages=messages,
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -22,6 +22,7 @@ from fastapi import Body, FastAPI, HTTPException, Request
 from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.responses import JSONResponse, StreamingResponse
+from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 from typing_extensions import Annotated

@ -92,7 +93,7 @@ async def global_exception_handler(request: Request, exc: Exception):

 def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidationError]:
    if isinstance(exc, ValidationError):
-        exc = RequestValidationError(exc.raw_errors)
+        exc = RequestValidationError(exc.errors())

    if isinstance(exc, RequestValidationError):
        return HTTPException(
@ -110,6 +111,8 @@ def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidatio
        )
    elif isinstance(exc, ValueError):
        return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}")
+    elif isinstance(exc, BadRequestError):
+        return HTTPException(status_code=400, detail=str(exc))
    elif isinstance(exc, PermissionError):
        return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
    elif isinstance(exc, TimeoutError):
@ -162,14 +165,17 @@ async def maybe_await(value):
    return value


-async def sse_generator(event_gen):
+async def sse_generator(event_gen_coroutine):
+    event_gen = None
    try:
-        async for item in await event_gen:
+        event_gen = await event_gen_coroutine
+        async for item in event_gen:
            yield create_sse_event(item)
            await asyncio.sleep(0.01)
    except asyncio.CancelledError:
        logger.info("Generator cancelled")
-        await event_gen.aclose()
+        if event_gen:
+            await event_gen.aclose()
    except Exception as e:
        logger.exception("Error in sse_generator")
        yield create_sse_event(
@ -455,6 +461,7 @@ def main(args: Optional[argparse.Namespace] = None):
                raise ValueError(f"Could not find method {endpoint.name} on {impl}!!")

            impl_method = getattr(impl, endpoint.name)
+            logger.debug(f"{endpoint.method.upper()} {endpoint.route}")

            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=UserWarning, module="pydantic._internal._fields")
--- a/llama_stack/distribution/ui/page/playground/rag.py
+++ b/llama_stack/distribution/ui/page/playground/rag.py
@ -24,6 +24,13 @@ def rag_chat_page():
    def should_disable_input():
        return "displayed_messages" in st.session_state and len(st.session_state.displayed_messages) > 0

+    def log_message(message):
+        with st.chat_message(message["role"]):
+            if "tool_output" in message and message["tool_output"]:
+                with st.expander(label="Tool Output", expanded=False, icon="🛠"):
+                    st.write(message["tool_output"])
+            st.markdown(message["content"])
+
    with st.sidebar:
        # File/Directory Upload Section
        st.subheader("Upload Documents", divider=True)
@ -146,8 +153,7 @@ def rag_chat_page():

    # Display chat history
    for message in st.session_state.displayed_messages:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
+        log_message(message)

    if temperature > 0.0:
        strategy = {
@ -201,7 +207,7 @@ def rag_chat_page():

        # Display assistant response
        with st.chat_message("assistant"):
-            retrieval_message_placeholder = st.empty()
+            retrieval_message_placeholder = st.expander(label="Tool Output", expanded=False, icon="🛠")
            message_placeholder = st.empty()
            full_response = ""
            retrieval_response = ""
@ -209,14 +215,16 @@ def rag_chat_page():
                log.print()
                if log.role == "tool_execution":
                    retrieval_response += log.content.replace("====", "").strip()
-                    retrieval_message_placeholder.info(retrieval_response)
+                    retrieval_message_placeholder.write(retrieval_response)
                else:
                    full_response += log.content
                    message_placeholder.markdown(full_response + "▌")
            message_placeholder.markdown(full_response)

            st.session_state.messages.append({"role": "assistant", "content": full_response})
-            st.session_state.displayed_messages.append({"role": "assistant", "content": full_response})
+            st.session_state.displayed_messages.append(
+                {"role": "assistant", "content": full_response, "tool_output": retrieval_response}
+            )

    def direct_process_prompt(prompt):
        # Add the system prompt in the beginning of the conversation
@ -230,15 +238,14 @@ def rag_chat_page():
        prompt_context = rag_response.content

        with st.chat_message("assistant"):
+            with st.expander(label="Retrieval Output", expanded=False):
+                st.write(prompt_context)
+
            retrieval_message_placeholder = st.empty()
            message_placeholder = st.empty()
            full_response = ""
            retrieval_response = ""

-            # Display the retrieved content
-            retrieval_response += str(prompt_context)
-            retrieval_message_placeholder.info(retrieval_response)
-
            # Construct the extended prompt
            extended_prompt = f"Please answer the following query using the context below.\n\nCONTEXT:\n{prompt_context}\n\nQUERY:\n{prompt}"

--- a/llama_stack/distribution/ui/page/playground/tools.py
+++ b/llama_stack/distribution/ui/page/playground/tools.py
@ -4,14 +4,23 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import enum
+import json
 import uuid

 import streamlit as st
 from llama_stack_client import Agent
+from llama_stack_client.lib.agents.react.agent import ReActAgent
+from llama_stack_client.lib.agents.react.tool_parser import ReActOutput

 from llama_stack.distribution.ui.modules.api import llama_stack_api


+class AgentType(enum.Enum):
+    REGULAR = "Regular"
+    REACT = "ReAct"
+
+
 def tool_chat_page():
    st.title("🛠 Tools")

@ -23,50 +32,117 @@ def tool_chat_page():
    tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
    mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
    builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
+    selected_vector_dbs = []

    def reset_agent():
        st.session_state.clear()
        st.cache_resource.clear()

    with st.sidebar:
+        st.title("Configuration")
        st.subheader("Model")
-        model = st.selectbox(label="models", options=model_list, on_change=reset_agent)
+        model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed")
+
+        st.subheader("Available ToolGroups")

-        st.subheader("Builtin Tools")
        toolgroup_selection = st.pills(
-            label="Available ToolGroups", options=builtin_tools_list, selection_mode="multi", on_change=reset_agent
+            label="Built-in tools",
+            options=builtin_tools_list,
+            selection_mode="multi",
+            on_change=reset_agent,
+            format_func=lambda tool: "".join(tool.split("::")[1:]),
+            help="List of built-in tools from your llama stack server.",
        )

-        st.subheader("MCP Servers")
+        if "builtin::rag" in toolgroup_selection:
+            vector_dbs = llama_stack_api.client.vector_dbs.list() or []
+            if not vector_dbs:
+                st.info("No vector databases available for selection.")
+            vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
+            selected_vector_dbs = st.multiselect(
+                label="Select Document Collections to use in RAG queries",
+                options=vector_dbs,
+                on_change=reset_agent,
+            )
+
        mcp_selection = st.pills(
-            label="Available MCP Servers", options=mcp_tools_list, selection_mode="multi", on_change=reset_agent
+            label="MCP Servers",
+            options=mcp_tools_list,
+            selection_mode="multi",
+            on_change=reset_agent,
+            format_func=lambda tool: "".join(tool.split("::")[1:]),
+            help="List of MCP servers registered to your llama stack server.",
        )

        toolgroup_selection.extend(mcp_selection)

-        active_tool_list = []
-        for toolgroup_id in toolgroup_selection:
-            active_tool_list.extend(
-                [
-                    f"{''.join(toolgroup_id.split('::')[1:])}:{t.identifier}"
-                    for t in client.tools.list(toolgroup_id=toolgroup_id)
-                ]
-            )
+        grouped_tools = {}
+        total_tools = 0

-        st.subheader(f"Active Tools: 🛠 {len(active_tool_list)}")
-        st.json(active_tool_list)
+        for toolgroup_id in toolgroup_selection:
+            tools = client.tools.list(toolgroup_id=toolgroup_id)
+            grouped_tools[toolgroup_id] = [tool.identifier for tool in tools]
+            total_tools += len(tools)
+
+        st.markdown(f"Active Tools: 🛠 {total_tools}")
+
+        for group_id, tools in grouped_tools.items():
+            with st.expander(f"🔧 Tools from `{group_id}`"):
+                for idx, tool in enumerate(tools, start=1):
+                    st.markdown(f"{idx}. `{tool.split(':')[-1]}`")
+
+        st.subheader("Agent Configurations")
+        st.subheader("Agent Type")
+        agent_type = st.radio(
+            "Select Agent Type",
+            [AgentType.REGULAR, AgentType.REACT],
+            format_func=lambda x: x.value,
+            on_change=reset_agent,
+        )
+
+        max_tokens = st.slider(
+            "Max Tokens",
+            min_value=0,
+            max_value=4096,
+            value=512,
+            step=64,
+            help="The maximum number of tokens to generate",
+            on_change=reset_agent,
+        )
+
+    for i, tool_name in enumerate(toolgroup_selection):
+        if tool_name == "builtin::rag":
+            tool_dict = dict(
+                name="builtin::rag",
+                args={
+                    "vector_db_ids": list(selected_vector_dbs),
+                },
+            )
+            toolgroup_selection[i] = tool_dict

    @st.cache_resource
    def create_agent():
-        return Agent(
-            client,
-            model=model,
-            instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
-            tools=toolgroup_selection,
-            sampling_params={
-                "strategy": {"type": "greedy"},
-            },
-        )
+        if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT:
+            return ReActAgent(
+                client=client,
+                model=model,
+                tools=toolgroup_selection,
+                response_format={
+                    "type": "json_schema",
+                    "json_schema": ReActOutput.model_json_schema(),
+                },
+                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
+            )
+        else:
+            return Agent(
+                client,
+                model=model,
+                instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
+                tools=toolgroup_selection,
+                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
+            )
+
+    st.session_state.agent_type = agent_type

    agent = create_agent()

@ -95,6 +171,158 @@ def tool_chat_page():
        )

        def response_generator(turn_response):
+            if st.session_state.get("agent_type") == AgentType.REACT:
+                return _handle_react_response(turn_response)
+            else:
+                return _handle_regular_response(turn_response)
+
+        def _handle_react_response(turn_response):
+            current_step_content = ""
+            final_answer = None
+            tool_results = []
+
+            for response in turn_response:
+                if not hasattr(response.event, "payload"):
+                    yield (
+                        "\n\n🚨 :red[_Llama Stack server Error:_]\n"
+                        "The response received is missing an expected `payload` attribute.\n"
+                        "This could indicate a malformed response or an internal issue within the server.\n\n"
+                        f"Error details: {response}"
+                    )
+                    return
+
+                payload = response.event.payload
+
+                if payload.event_type == "step_progress" and hasattr(payload.delta, "text"):
+                    current_step_content += payload.delta.text
+                    continue
+
+                if payload.event_type == "step_complete":
+                    step_details = payload.step_details
+
+                    if step_details.step_type == "inference":
+                        yield from _process_inference_step(current_step_content, tool_results, final_answer)
+                        current_step_content = ""
+                    elif step_details.step_type == "tool_execution":
+                        tool_results = _process_tool_execution(step_details, tool_results)
+                        current_step_content = ""
+                    else:
+                        current_step_content = ""
+
+            if not final_answer and tool_results:
+                yield from _format_tool_results_summary(tool_results)
+
+        def _process_inference_step(current_step_content, tool_results, final_answer):
+            try:
+                react_output_data = json.loads(current_step_content)
+                thought = react_output_data.get("thought")
+                action = react_output_data.get("action")
+                answer = react_output_data.get("answer")
+
+                if answer and answer != "null" and answer is not None:
+                    final_answer = answer
+
+                if thought:
+                    with st.expander("🤔 Thinking...", expanded=False):
+                        st.markdown(f":grey[__{thought}__]")
+
+                if action and isinstance(action, dict):
+                    tool_name = action.get("tool_name")
+                    tool_params = action.get("tool_params")
+                    with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False):
+                        st.json(tool_params)
+
+                if answer and answer != "null" and answer is not None:
+                    yield f"\n\n✅ **Final Answer:**\n{answer}"
+
+            except json.JSONDecodeError:
+                yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```"
+            except Exception as e:
+                yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```"
+
+            return final_answer
+
+        def _process_tool_execution(step_details, tool_results):
+            try:
+                if hasattr(step_details, "tool_responses") and step_details.tool_responses:
+                    for tool_response in step_details.tool_responses:
+                        tool_name = tool_response.tool_name
+                        content = tool_response.content
+                        tool_results.append((tool_name, content))
+                        with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False):
+                            try:
+                                parsed_content = json.loads(content)
+                                st.json(parsed_content)
+                            except json.JSONDecodeError:
+                                st.code(content, language=None)
+                else:
+                    with st.expander("⚙️ Observation", expanded=False):
+                        st.markdown(":grey[_Tool execution step completed, but no response data found._]")
+            except Exception as e:
+                with st.expander("⚙️ Error in Tool Execution", expanded=False):
+                    st.markdown(f":red[_Error processing tool execution: {str(e)}_]")
+
+            return tool_results
+
+        def _format_tool_results_summary(tool_results):
+            yield "\n\n**Here's what I found:**\n"
+            for tool_name, content in tool_results:
+                try:
+                    parsed_content = json.loads(content)
+
+                    if tool_name == "web_search" and "top_k" in parsed_content:
+                        yield from _format_web_search_results(parsed_content)
+                    elif "results" in parsed_content and isinstance(parsed_content["results"], list):
+                        yield from _format_results_list(parsed_content["results"])
+                    elif isinstance(parsed_content, dict) and len(parsed_content) > 0:
+                        yield from _format_dict_results(parsed_content)
+                    elif isinstance(parsed_content, list) and len(parsed_content) > 0:
+                        yield from _format_list_results(parsed_content)
+                except json.JSONDecodeError:
+                    yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n"
+                except (TypeError, AttributeError, KeyError, IndexError) as e:
+                    print(f"Error processing {tool_name} result: {type(e).__name__}: {e}")
+
+        def _format_web_search_results(parsed_content):
+            for i, result in enumerate(parsed_content["top_k"], 1):
+                if i <= 3:
+                    title = result.get("title", "Untitled")
+                    url = result.get("url", "")
+                    content_text = result.get("content", "").strip()
+                    yield f"\n- **{title}**\n  {content_text}\n  [Source]({url})\n"
+
+        def _format_results_list(results):
+            for i, result in enumerate(results, 1):
+                if i <= 3:
+                    if isinstance(result, dict):
+                        name = result.get("name", result.get("title", "Result " + str(i)))
+                        description = result.get("description", result.get("content", result.get("summary", "")))
+                        yield f"\n- **{name}**\n  {description}\n"
+                    else:
+                        yield f"\n- {result}\n"
+
+        def _format_dict_results(parsed_content):
+            yield "\n```\n"
+            for key, value in list(parsed_content.items())[:5]:
+                if isinstance(value, str) and len(value) < 100:
+                    yield f"{key}: {value}\n"
+                else:
+                    yield f"{key}: [Complex data]\n"
+            yield "```\n"
+
+        def _format_list_results(parsed_content):
+            yield "\n"
+            for _, item in enumerate(parsed_content[:3], 1):
+                if isinstance(item, str):
+                    yield f"- {item}\n"
+                elif isinstance(item, dict) and "text" in item:
+                    yield f"- {item['text']}\n"
+                elif isinstance(item, dict) and len(item) > 0:
+                    first_value = next(iter(item.values()))
+                    if isinstance(first_value, str) and len(first_value) < 100:
+                        yield f"- {first_value}\n"
+
+        def _handle_regular_response(turn_response):
            for response in turn_response:
                if hasattr(response.event, "payload"):
                    print(response.event.payload)
@ -103,14 +331,18 @@ def tool_chat_page():
                            yield response.event.payload.delta.text
                    if response.event.payload.event_type == "step_complete":
                        if response.event.payload.step_details.step_type == "tool_execution":
-                            yield " 🛠 "
+                            if response.event.payload.step_details.tool_calls:
+                                tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name)
+                                yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n'
+                            else:
+                                yield "No tool_calls present in step_details"
                else:
                    yield f"Error occurred in the Llama Stack Cluster: {response}"

        with st.chat_message("assistant"):
-            response = st.write_stream(response_generator(turn_response))
+            response_content = st.write_stream(response_generator(turn_response))

-        st.session_state.messages.append({"role": "assistant", "content": response})
+        st.session_state.messages.append({"role": "assistant", "content": response_content})


 tool_chat_page()